From af336048f7f053f1c27e9c87530b2300ab20b015 Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Mon, 4 Dec 2017 20:32:55 +0000 Subject: Assorted improvements to he SQL lexer to better handle whitespace and comments mixed into statements --- libdbpp/sqlParse.ll | 38 +++++++++++++++++---------- libdbpp/unittests/Jamfile.jam | 4 +++ libdbpp/unittests/commentsMixedIn.sql | 7 +++++ libdbpp/unittests/indentedBlockComment.sql | 5 ++++ libdbpp/unittests/indentedOneLineComment.sql | 5 ++++ libdbpp/unittests/indentedStatement.sql | 5 ++++ libdbpp/unittests/testParse.cpp | 39 ++++++++++++++++++++++++++++ 7 files changed, 89 insertions(+), 14 deletions(-) create mode 100644 libdbpp/unittests/commentsMixedIn.sql create mode 100644 libdbpp/unittests/indentedBlockComment.sql create mode 100644 libdbpp/unittests/indentedOneLineComment.sql create mode 100644 libdbpp/unittests/indentedStatement.sql diff --git a/libdbpp/sqlParse.ll b/libdbpp/sqlParse.ll index f9c1525..835ce20 100644 --- a/libdbpp/sqlParse.ll +++ b/libdbpp/sqlParse.ll @@ -14,9 +14,9 @@ space [ \t\n\r\f] non_newline [^\r\n] -mcomment_start "/*" -mcomment_stop "*/" -comment ("--"{non_newline}*) +mcomment_start ("/*"{space}*) +mcomment_stop ({space}*"*/") +lcomment_start ({space}*"--"{space}*) other . term ; any ({other}|{space}) @@ -28,41 +28,54 @@ dolq_cont [A-Za-z\200-\377_0-9] dollarquote \$({dolq_start}{dolq_cont}*)?\$ scriptdir "$SCRIPTDIR" -%x COMMENT +%x MCOMMENT +%x LCOMMENT %x STATEMENT %x QUOTE %x DOLLARQUOTE %% +{mcomment_start} { + yy_push_state(MCOMMENT); +} + {mcomment_start} { - comment += YYText(); - yy_push_state(COMMENT); + yy_push_state(MCOMMENT); } -{mcomment_stop} { - comment += YYText(); +{lcomment_start} { + yy_push_state(LCOMMENT); +} + +{lcomment_start} { + yy_push_state(LCOMMENT); +} + +{mcomment_stop} { Comment(comment); comment.clear(); yy_pop_state(); } -{any} { +{any} { comment += YYText(); } -<> { +<> { throw SqlParseException("Unterminated comment", yylineno); } -{comment} { +{non_newline}* { Comment(YYText()); + yy_pop_state(); } {term} { // Random terminator } +{space} { } {other} { statement += YYText(); yy_push_state(STATEMENT); @@ -126,6 +139,3 @@ scriptdir "$SCRIPTDIR" statement += YYText(); } -<*>[ \t\r\n\f] { -} - diff --git a/libdbpp/unittests/Jamfile.jam b/libdbpp/unittests/Jamfile.jam index d0500f0..83e1ad4 100644 --- a/libdbpp/unittests/Jamfile.jam +++ b/libdbpp/unittests/Jamfile.jam @@ -29,6 +29,10 @@ run ..//dbppcore ..//adhocutil boost_utf + commentsMixedIn.sql + indentedStatement.sql + indentedOneLineComment.sql + indentedBlockComment.sql parseTest.sql unterminatedComment.sql unterminatedString.sql diff --git a/libdbpp/unittests/commentsMixedIn.sql b/libdbpp/unittests/commentsMixedIn.sql new file mode 100644 index 0000000..1766309 --- /dev/null +++ b/libdbpp/unittests/commentsMixedIn.sql @@ -0,0 +1,7 @@ +CREATE TABLE foo(/* + Foo contains test things + */ + id int, -- Every table deserves an Id, right? + timestamp time stamp -- And a timestamp + ); + diff --git a/libdbpp/unittests/indentedBlockComment.sql b/libdbpp/unittests/indentedBlockComment.sql new file mode 100644 index 0000000..45294a0 --- /dev/null +++ b/libdbpp/unittests/indentedBlockComment.sql @@ -0,0 +1,5 @@ + + + /* Some comment text + */ + diff --git a/libdbpp/unittests/indentedOneLineComment.sql b/libdbpp/unittests/indentedOneLineComment.sql new file mode 100644 index 0000000..d72bb31 --- /dev/null +++ b/libdbpp/unittests/indentedOneLineComment.sql @@ -0,0 +1,5 @@ + + + -- Some comment text + + diff --git a/libdbpp/unittests/indentedStatement.sql b/libdbpp/unittests/indentedStatement.sql new file mode 100644 index 0000000..4273f35 --- /dev/null +++ b/libdbpp/unittests/indentedStatement.sql @@ -0,0 +1,5 @@ + + + SELECT 1; + + diff --git a/libdbpp/unittests/testParse.cpp b/libdbpp/unittests/testParse.cpp index 1272f9b..093cc7d 100644 --- a/libdbpp/unittests/testParse.cpp +++ b/libdbpp/unittests/testParse.cpp @@ -78,6 +78,45 @@ BOOST_AUTO_TEST_CASE( parseStringParse ) BOOST_REQUIRE_EQUAL("INSERT INTO name(t, i) VALUES('fancy string '' \\' \\r \\n', 7)", p.executed[1]); } +BOOST_AUTO_TEST_CASE( indentedStatement ) +{ + RecordingParser p(rootDir / "indentedStatement.sql"); + p.Execute(); + BOOST_REQUIRE_EQUAL(1, p.executed.size()); + BOOST_REQUIRE_EQUAL("SELECT 1", p.executed[0]); + BOOST_REQUIRE(p.comments.empty()); +} + +BOOST_AUTO_TEST_CASE( indentedOneLineComment ) +{ + RecordingParser p(rootDir / "indentedOneLineComment.sql"); + p.Execute(); + BOOST_REQUIRE_EQUAL(1, p.comments.size()); + BOOST_REQUIRE_EQUAL("Some comment text", p.comments[0]); + BOOST_REQUIRE(p.executed.empty()); +} + +BOOST_AUTO_TEST_CASE( indentedBlockComment ) +{ + RecordingParser p(rootDir / "indentedBlockComment.sql"); + p.Execute(); + BOOST_REQUIRE_EQUAL(1, p.comments.size()); + BOOST_REQUIRE_EQUAL("Some comment text", p.comments[0]); + BOOST_REQUIRE(p.executed.empty()); +} + +BOOST_AUTO_TEST_CASE( commentsMixedIn ) +{ + RecordingParser p(rootDir / "commentsMixedIn.sql"); + p.Execute(); + BOOST_REQUIRE_EQUAL(1, p.executed.size()); + BOOST_REQUIRE_EQUAL("CREATE TABLE foo(\n\t\tid int,\n\t\ttimestamp time stamp\n\t\t)", p.executed[0]); + BOOST_REQUIRE_EQUAL(3, p.comments.size()); + BOOST_REQUIRE_EQUAL("Foo contains test things", p.comments[0]); + BOOST_REQUIRE_EQUAL("Every table deserves an Id, right?", p.comments[1]); + BOOST_REQUIRE_EQUAL("And a timestamp", p.comments[2]); +} + BOOST_AUTO_TEST_CASE( parseUnterminateComment ) { assertFail(rootDir / "unterminatedComment.sql"); -- cgit v1.2.3