From c80d3b0191b4e87527e08d883bd446eaec382664 Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Sun, 12 Sep 2021 16:52:16 +0100 Subject: Rewrite UriParse in modern C++ --- libadhocutil/unittests/testUriParse.cpp | 83 ++++++++---- libadhocutil/uriParse.cpp | 228 +++++++++++++------------------- 2 files changed, 151 insertions(+), 160 deletions(-) diff --git a/libadhocutil/unittests/testUriParse.cpp b/libadhocutil/unittests/testUriParse.cpp index ff49e40..ecb8049 100644 --- a/libadhocutil/unittests/testUriParse.cpp +++ b/libadhocutil/unittests/testUriParse.cpp @@ -23,6 +23,19 @@ BOOST_AUTO_TEST_CASE(simple) BOOST_CHECK(!u.fragment); } +BOOST_AUTO_TEST_CASE(simple_ipv6) +{ + AdHoc::Uri u("http://[fe80::20e2:d5ff:fed7:c631]"); + BOOST_CHECK_EQUAL("http", u.scheme); + BOOST_CHECK_EQUAL("[fe80::20e2:d5ff:fed7:c631]", u.host); + BOOST_CHECK(!u.port); + BOOST_CHECK(!u.username); + BOOST_CHECK(!u.password); + BOOST_CHECK(!u.path); + BOOST_CHECK(u.query.empty()); + BOOST_CHECK(!u.fragment); +} + BOOST_AUTO_TEST_CASE(lowerScheme) { AdHoc::Uri u("HtTP://localhost"); @@ -132,7 +145,7 @@ BOOST_AUTO_TEST_CASE(query0) AdHoc::Uri u("http://localhost/?"); BOOST_CHECK_EQUAL("http", u.scheme); BOOST_CHECK_EQUAL("localhost", u.host); - BOOST_CHECK_EQUAL(0, u.query.size()); + BOOST_CHECK(u.query.empty()); } BOOST_AUTO_TEST_CASE(query1) @@ -140,9 +153,10 @@ BOOST_AUTO_TEST_CASE(query1) AdHoc::Uri u("http://localhost/?var=val"); BOOST_CHECK_EQUAL("http", u.scheme); BOOST_CHECK_EQUAL("localhost", u.host); - BOOST_CHECK_EQUAL(1, u.query.size()); - BOOST_CHECK_EQUAL("var", u.query.begin()->first); - BOOST_CHECK_EQUAL("val", u.query.begin()->second); + BOOST_CHECK_EQUAL_IF(1, u.query.size()) { + BOOST_CHECK_EQUAL("var", u.query.begin()->first); + BOOST_CHECK_EQUAL("val", u.query.begin()->second); + } } BOOST_AUTO_TEST_CASE(query2) @@ -150,11 +164,12 @@ BOOST_AUTO_TEST_CASE(query2) AdHoc::Uri u("http://localhost/?var=val&name=value"); BOOST_CHECK_EQUAL("http", u.scheme); BOOST_CHECK_EQUAL("localhost", u.host); - BOOST_CHECK_EQUAL(2, u.query.size()); - BOOST_CHECK_EQUAL("name", u.query.begin()->first); - BOOST_CHECK_EQUAL("value", u.query.begin()->second); - BOOST_CHECK_EQUAL("var", u.query.rbegin()->first); - BOOST_CHECK_EQUAL("val", u.query.rbegin()->second); + BOOST_CHECK_EQUAL_IF(2, u.query.size()) { + BOOST_CHECK_EQUAL("name", u.query.begin()->first); + BOOST_CHECK_EQUAL("value", u.query.begin()->second); + BOOST_CHECK_EQUAL("var", u.query.rbegin()->first); + BOOST_CHECK_EQUAL("val", u.query.rbegin()->second); + } } BOOST_AUTO_TEST_CASE(queryMany) @@ -162,11 +177,12 @@ BOOST_AUTO_TEST_CASE(queryMany) AdHoc::Uri u("http://localhost/?name=val&name=value"); BOOST_CHECK_EQUAL("http", u.scheme); BOOST_CHECK_EQUAL("localhost", u.host); - BOOST_CHECK_EQUAL(2, u.query.size()); - BOOST_CHECK_EQUAL("name", u.query.begin()->first); - BOOST_CHECK_EQUAL("val", u.query.begin()->second); - BOOST_CHECK_EQUAL("name", u.query.rbegin()->first); - BOOST_CHECK_EQUAL("value", u.query.rbegin()->second); + BOOST_CHECK_EQUAL_IF(2, u.query.size()) { + BOOST_CHECK_EQUAL("name", u.query.begin()->first); + BOOST_CHECK_EQUAL("val", u.query.begin()->second); + BOOST_CHECK_EQUAL("name", u.query.rbegin()->first); + BOOST_CHECK_EQUAL("value", u.query.rbegin()->second); + } } BOOST_AUTO_TEST_CASE(queryNoValue1) @@ -174,9 +190,10 @@ BOOST_AUTO_TEST_CASE(queryNoValue1) AdHoc::Uri u("http://localhost/?n1"); BOOST_CHECK_EQUAL("http", u.scheme); BOOST_CHECK_EQUAL("localhost", u.host); - BOOST_CHECK_EQUAL(1, u.query.size()); - BOOST_CHECK_EQUAL("n1", u.query.begin()->first); - BOOST_CHECK_EQUAL("", u.query.begin()->second); + BOOST_CHECK_EQUAL_IF(1, u.query.size()) { + BOOST_CHECK_EQUAL("n1", u.query.begin()->first); + BOOST_CHECK_EQUAL("", u.query.begin()->second); + } } BOOST_AUTO_TEST_CASE(queryNoValue1eq) @@ -184,9 +201,10 @@ BOOST_AUTO_TEST_CASE(queryNoValue1eq) AdHoc::Uri u("http://localhost/?n1="); BOOST_CHECK_EQUAL("http", u.scheme); BOOST_CHECK_EQUAL("localhost", u.host); - BOOST_CHECK_EQUAL(1, u.query.size()); - BOOST_CHECK_EQUAL("n1", u.query.begin()->first); - BOOST_CHECK_EQUAL("", u.query.begin()->second); + BOOST_CHECK_EQUAL_IF(1, u.query.size()) { + BOOST_CHECK_EQUAL("n1", u.query.begin()->first); + BOOST_CHECK_EQUAL("", u.query.begin()->second); + } } BOOST_AUTO_TEST_CASE(queryNoValue2) @@ -194,11 +212,12 @@ BOOST_AUTO_TEST_CASE(queryNoValue2) AdHoc::Uri u("http://localhost/?n1=&n2"); BOOST_CHECK_EQUAL("http", u.scheme); BOOST_CHECK_EQUAL("localhost", u.host); - BOOST_CHECK_EQUAL(2, u.query.size()); - BOOST_CHECK_EQUAL("n1", u.query.begin()->first); - BOOST_CHECK_EQUAL("", u.query.begin()->second); - BOOST_CHECK_EQUAL("n2", u.query.rbegin()->first); - BOOST_CHECK_EQUAL("", u.query.rbegin()->second); + BOOST_CHECK_EQUAL_IF(2, u.query.size()) { + BOOST_CHECK_EQUAL("n1", u.query.begin()->first); + BOOST_CHECK_EQUAL("", u.query.begin()->second); + BOOST_CHECK_EQUAL("n2", u.query.rbegin()->first); + BOOST_CHECK_EQUAL("", u.query.rbegin()->second); + } } BOOST_AUTO_TEST_CASE(fragment) @@ -239,8 +258,18 @@ BOOST_AUTO_TEST_CASE(bad) BOOST_CHECK_THROW(AdHoc::Uri("http:/"), AdHoc::InvalidUri); BOOST_CHECK_THROW(AdHoc::Uri("tcp://"), AdHoc::InvalidUri); BOOST_CHECK_THROW(AdHoc::Uri("ftp/local"), AdHoc::InvalidUri); - BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:"), std::bad_cast); - BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:foo"), std::bad_cast); + BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:"), AdHoc::InvalidUri); + BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:foo"), AdHoc::InvalidUri); + BOOST_CHECK_THROW(AdHoc::Uri("tcp://:"), AdHoc::InvalidUri); + BOOST_CHECK_THROW(AdHoc::Uri("tcp:///"), AdHoc::InvalidUri); + BOOST_CHECK_THROW(AdHoc::Uri("tcp://[abcd"), AdHoc::InvalidUri); + BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:foo/"), AdHoc::InvalidUri); + BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:80a/"), AdHoc::InvalidUri); + BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:80a"), AdHoc::InvalidUri); + BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:-80"), AdHoc::InvalidUri); + BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:-1"), AdHoc::InvalidUri); + BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:65536"), AdHoc::InvalidUri); + BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:/"), AdHoc::InvalidUri); BOOST_CHECK_THROW(AdHoc::Uri("tcp://user:pass@"), AdHoc::InvalidUri); AdHoc::InvalidUri ui("message", "http://localhost"); diff --git a/libadhocutil/uriParse.cpp b/libadhocutil/uriParse.cpp index 489747e..e2cff82 100644 --- a/libadhocutil/uriParse.cpp +++ b/libadhocutil/uriParse.cpp @@ -3,178 +3,140 @@ #include #include #include +#include #include #include #include namespace AdHoc { - static inline int - _is_scheme_char(int c) - { - return (!isalpha(c) && '+' != c && '-' != c && '.' != c) ? 0 : 1; - } - Uri::Uri(const std::string & uri) { - auto * puri = this; - const char * curstr; - int userpass_flag; - int bracket_flag; - - curstr = uri.c_str(); + auto is_scheme_char = [](int c) { + return (!std::isalpha(c) && '+' != c && '-' != c && '.' != c) ? 0 : 1; + }; + auto endor = [](std::string_view haystack, auto needle) { + if (const auto n = haystack.find_first_of(needle); n != std::string_view::npos) { + return n; + } + return haystack.length(); + }; + auto parsePort = [&uri, this](const std::string_view in) { + if (in.empty()) { + throw InvalidUri("Invalid format; no port after :", uri); + } + port.emplace(); + if (const auto rc = std::from_chars(in.begin(), in.end(), *port); + rc.ptr != in.end() || rc.ec != std::errc {}) { + throw InvalidUri("Invalid port", uri); + } + }; - const char * tmpstr = ::strchr(curstr, ':'); - if (!tmpstr) { - throw InvalidUri("Schema marker not found", uri); + std::string_view curstr = uri; + if (auto colon = curstr.find(':'); colon == std::string_view::npos) { + throw InvalidUri("Scheme marker not found", uri); } - auto len = tmpstr - curstr; - for (decltype(len) i = 0; i < len; i++) { - if (!_is_scheme_char(curstr[i])) { - throw InvalidUri("Invalid format", uri); + else { + if (!std::all_of(curstr.begin(), curstr.begin() + colon, is_scheme_char)) { + throw InvalidUri("Invalid format; no scheme end", uri); } + scheme = curstr.substr(0, colon); + boost::algorithm::to_lower(scheme); + curstr.remove_prefix(colon + 1); } - puri->scheme = std::string(curstr, len); - boost::algorithm::to_lower(puri->scheme); - tmpstr++; - curstr = tmpstr; - for (int i = 0; i < 2; i++) { - if ('/' != *curstr) { - throw InvalidUri("Invalid format", uri); - } - curstr++; + if (!curstr.starts_with("//")) { + throw InvalidUri("Invalid format; // not where expected", uri); } + curstr.remove_prefix(2); - userpass_flag = 0; - tmpstr = curstr; - while ('\0' != *tmpstr) { - if ('@' == *tmpstr) { - userpass_flag = 1; - break; + if (const auto n = curstr.find_first_of("@/"); n != std::string_view::npos && curstr[n] == '@') { + if (const auto colon = curstr.find_first_of("@:"); curstr[colon] == ':') { + username = curstr.substr(0, colon); + password = curstr.substr(colon + 1, n - colon - 1); } - else if ('/' == *tmpstr) { - userpass_flag = 0; - break; + else { + username = curstr.substr(0, n); } - tmpstr++; + curstr.remove_prefix(n + 1); } - tmpstr = curstr; - if (userpass_flag) { - while ('\0' != *tmpstr && ':' != *tmpstr && '@' != *tmpstr) { - tmpstr++; - } - len = tmpstr - curstr; - puri->username = std::string(curstr, len); - curstr = tmpstr; - if (':' == *curstr) { - curstr++; - tmpstr = curstr; - while ('\0' != *tmpstr && '@' != *tmpstr) { - tmpstr++; - } - len = tmpstr - curstr; - puri->password = std::string(curstr, len); - curstr = tmpstr; + if (curstr.empty()) { + throw InvalidUri("Invalid format", uri); + } + if (curstr.starts_with('[')) { + if (const auto closeb = curstr.find(']'); closeb == std::string_view::npos) { + throw InvalidUri("IPv6 address not terminated", uri); } - if ('@' != *curstr) { - throw InvalidUri("Invalid format", uri); + else { + host = curstr.substr(0, closeb + 1); + curstr.remove_prefix(closeb + 1); } - curstr++; - } - - if ('[' == *curstr) { - bracket_flag = 1; } else { - bracket_flag = 0; - } - - tmpstr = curstr; - while ('\0' != *tmpstr) { - if (bracket_flag && ']' == *tmpstr) { - tmpstr++; - break; + if (const auto hostend = curstr.find_first_of(":/"); hostend == std::string_view::npos) { + host = curstr; + return; } - else if (!bracket_flag && (':' == *tmpstr || '/' == *tmpstr)) { - break; + else { + if (hostend == 0) { + throw InvalidUri("Host cannot be blank", uri); + } + host = curstr.substr(0, hostend); + boost::algorithm::to_lower(host); + curstr.remove_prefix(hostend); } - tmpstr++; - } - if (tmpstr == curstr) { - throw InvalidUri("Host cannot be blank", uri); } - len = tmpstr - curstr; - puri->host = std::string(curstr, len); - boost::algorithm::to_lower(puri->host); - curstr = tmpstr; + boost::algorithm::to_lower(host); - if (':' == *curstr) { - curstr++; - tmpstr = curstr; - while ('\0' != *tmpstr && '/' != *tmpstr) { - tmpstr++; + if (curstr.empty()) { + return; + } + if (curstr.starts_with(':')) { + curstr.remove_prefix(1); + if (curstr.empty()) { + throw InvalidUri("Invalid format; no port after :", uri); + } + if (const auto portend = curstr.find('/'); portend == std::string_view::npos) { + parsePort(curstr); + return; + } + else { + parsePort(curstr.substr(0, portend)); + curstr.remove_prefix(portend); } - len = tmpstr - curstr; - puri->port = boost::lexical_cast(std::string(curstr, len)); - curstr = tmpstr; } + curstr.remove_prefix(1); - if ('\0' == *curstr) { + if (const auto pathend = curstr.find_first_of("#?"); pathend == std::string_view::npos) { + path = curstr; return; } - - if ('/' != *curstr) { - throw InvalidUri("Invalid format", uri); - } - curstr++; - - tmpstr = curstr; - while ('\0' != *tmpstr && '#' != *tmpstr && '?' != *tmpstr) { - tmpstr++; + else { + path = curstr.substr(0, pathend); + curstr.remove_prefix(pathend); } - len = tmpstr - curstr; - puri->path = std::string(curstr, len); - curstr = tmpstr; - if ('?' == *curstr) { - curstr++; - tmpstr = curstr; - while ('\0' != *tmpstr && '#' != *tmpstr) { - while ('\0' != *tmpstr && '#' != *tmpstr && '=' != *tmpstr && '&' != *tmpstr) { - tmpstr++; - } - len = tmpstr - curstr; - auto q = puri->query.insert({std::string(curstr, len), std::string()}); - curstr = tmpstr; - if ('=' == *curstr) { - curstr++; - while ('\0' != *tmpstr && '#' != *tmpstr && '&' != *tmpstr) { - tmpstr++; - } - len = tmpstr - curstr; - q->second = std::string(curstr, len); - curstr = tmpstr; + if (curstr.starts_with('?')) { + curstr.remove_prefix(1); + auto params = curstr.substr(0, endor(curstr, '#')); + while (!params.empty()) { + const auto pair = params.substr(0, endor(params, '&')); + if (const auto eq = pair.find('='); eq != std::string_view::npos) { + query.emplace(pair.substr(0, eq), pair.substr(eq + 1)); } - if ('&' == *tmpstr) { - tmpstr++; - curstr = tmpstr; + else { + query.emplace(pair, std::string {}); } - else if ('\0' != *tmpstr && '#' != *tmpstr) { - throw InvalidUri("Parse error in query params", uri); + params.remove_prefix(pair.length()); + if (!params.empty()) { + params.remove_prefix(1); } } - curstr = tmpstr; } - if ('#' == *curstr) { - curstr++; - tmpstr = curstr; - while ('\0' != *tmpstr) { - tmpstr++; - } - len = tmpstr - curstr; - puri->fragment = std::string(curstr, len); + if (curstr.starts_with('#')) { + curstr.remove_prefix(1); + fragment = curstr; } } -- cgit v1.2.3