summaryrefslogtreecommitdiffstats
path: root/document
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2020-03-31 15:34:39 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2020-03-31 15:34:39 +0000
commit013342a0470e7a5b456921010009c38a4a0c828f (patch)
tree83f29fbb787404d8434681a74955253c11f1108f /document
parentbc693476136615bb44b9c4705cd959e53fee1afd (diff)
Compile Flex lexer with options for better code generation
Diffstat (limited to 'document')
-rw-r--r--document/src/tests/documentselectparsertest.cpp10
-rw-r--r--document/src/vespa/document/select/grammar/lexer.ll7
2 files changed, 17 insertions, 0 deletions
diff --git a/document/src/tests/documentselectparsertest.cpp b/document/src/tests/documentselectparsertest.cpp
index c8cbd1be9c2..fce0d90db72 100644
--- a/document/src/tests/documentselectparsertest.cpp
+++ b/document/src/tests/documentselectparsertest.cpp
@@ -1653,4 +1653,14 @@ TEST_F(DocumentSelectParserTest, selection_has_upper_limit_on_input_size) {
verifyFailedParse(expr, "ParsingFailedException: expression is too large to be parsed");
}
+TEST_F(DocumentSelectParserTest, lexing_does_not_have_superlinear_time_complexity) {
+ createDocs();
+ std::string expr = ("testdoctype1.hstringval == 'a_biii"
+ + std::string(select::ParserLimits::MaxSelectionByteSize - 100, 'i')
+ + "iiig string'");
+ // If the lexer is not compiled with the appropriate options, this will take a long time.
+ // A really, really long time.
+ PARSE(expr, *_doc[0], False);
+}
+
} // document
diff --git a/document/src/vespa/document/select/grammar/lexer.ll b/document/src/vespa/document/select/grammar/lexer.ll
index bd011c8ebf6..1222aac02a2 100644
--- a/document/src/vespa/document/select/grammar/lexer.ll
+++ b/document/src/vespa/document/select/grammar/lexer.ll
@@ -7,6 +7,13 @@
%option noyywrap nounput
%option yyclass="document::select::DocSelScanner"
+ /* Flex lexer must be compiled with batch mode (as opposed to interactive mode)
+ * or parsing of large tokens appears to trigger superlinear time complexity.
+ * Also use full, non-compressed lookup tables for maximum performance.
+ */
+%option batch
+%option full
+
/* Used to track source locations, see https://github.com/bingmann/flex-bison-cpp-example/blob/master/src/scanner.ll */
%{
#define YY_USER_ACTION yyloc->columns(yyleng);