From 080c69c5b77102882a291110b6571bbef8ef8ffd Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Sat, 15 Oct 2016 14:55:49 +0100 Subject: Test and fix multibyte behaviour in lexer-regex --- libadhocutil/lexer-regex.cpp | 2 +- libadhocutil/unittests/testLexer.cpp | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/libadhocutil/lexer-regex.cpp b/libadhocutil/lexer-regex.cpp index 9ffee8b..2e3d7cf 100644 --- a/libadhocutil/lexer-regex.cpp +++ b/libadhocutil/lexer-regex.cpp @@ -56,7 +56,7 @@ namespace AdHoc { if (start == -1 && end == -1) { return boost::optional(); } - return Glib::ustring(str + start, end - start); + return Glib::ustring(str + start, str + end); } return boost::optional(); } diff --git a/libadhocutil/unittests/testLexer.cpp b/libadhocutil/unittests/testLexer.cpp index a8bc6ce..0e23586 100644 --- a/libadhocutil/unittests/testLexer.cpp +++ b/libadhocutil/unittests/testLexer.cpp @@ -73,6 +73,25 @@ BOOST_AUTO_TEST_CASE( state ) BOOST_REQUIRE_EQUAL("aa", s); } +BOOST_AUTO_TEST_CASE( multibyte ) +{ + AdHoc::Lexer::PatternPtr maskHead = AdHoc::LexerMatchers::regex( + "^# ([^<\n]+)? ?(<(.+?@[^\n>]+)>?)? \\((\\d+ *(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\\w* \\d+)\\)$", + (GRegexCompileFlags)(G_REGEX_OPTIMIZE | G_REGEX_CASELESS | G_REGEX_UNGREEDY)); + Glib::ustring input("# Michał Górny (28 Mar 2015)"); + BOOST_REQUIRE(input.bytes() > input.length()); + BOOST_REQUIRE(maskHead->matches(input.c_str(), input.bytes(), 0)); + BOOST_REQUIRE_EQUAL(maskHead->matchedLength(), input.bytes()); + BOOST_REQUIRE(maskHead->match(1)); + BOOST_REQUIRE_EQUAL("Michał Górny", *maskHead->match(1)); + BOOST_REQUIRE(maskHead->match(2)); + BOOST_REQUIRE_EQUAL("", *maskHead->match(2)); + BOOST_REQUIRE(maskHead->match(3)); + BOOST_REQUIRE_EQUAL("mgorny@gentoo.org", *maskHead->match(3)); + BOOST_REQUIRE(maskHead->match(4)); + BOOST_REQUIRE_EQUAL("28 Mar 2015", *maskHead->match(4)); +} + BOOST_AUTO_TEST_CASE( badre ) { BOOST_REQUIRE_THROW(regex("["), std::runtime_error); -- cgit v1.2.3