summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Goodliffe <dan@randomdan.homeip.net>2021-09-12 16:52:16 +0100
committerDan Goodliffe <dan@randomdan.homeip.net>2021-09-12 16:52:16 +0100
commitc80d3b0191b4e87527e08d883bd446eaec382664 (patch)
tree1002999bbfe4e8759faaaeff31b5e50e02ce4b51
parentAdd another boost test helper (diff)
downloadlibadhocutil-c80d3b0191b4e87527e08d883bd446eaec382664.tar.bz2
libadhocutil-c80d3b0191b4e87527e08d883bd446eaec382664.tar.xz
libadhocutil-c80d3b0191b4e87527e08d883bd446eaec382664.zip
Rewrite UriParse in modern C++
-rw-r--r--libadhocutil/unittests/testUriParse.cpp83
-rw-r--r--libadhocutil/uriParse.cpp228
2 files changed, 151 insertions, 160 deletions
diff --git a/libadhocutil/unittests/testUriParse.cpp b/libadhocutil/unittests/testUriParse.cpp
index ff49e40..ecb8049 100644
--- a/libadhocutil/unittests/testUriParse.cpp
+++ b/libadhocutil/unittests/testUriParse.cpp
@@ -23,6 +23,19 @@ BOOST_AUTO_TEST_CASE(simple)
BOOST_CHECK(!u.fragment);
}
+BOOST_AUTO_TEST_CASE(simple_ipv6)
+{
+ AdHoc::Uri u("http://[fe80::20e2:d5ff:fed7:c631]");
+ BOOST_CHECK_EQUAL("http", u.scheme);
+ BOOST_CHECK_EQUAL("[fe80::20e2:d5ff:fed7:c631]", u.host);
+ BOOST_CHECK(!u.port);
+ BOOST_CHECK(!u.username);
+ BOOST_CHECK(!u.password);
+ BOOST_CHECK(!u.path);
+ BOOST_CHECK(u.query.empty());
+ BOOST_CHECK(!u.fragment);
+}
+
BOOST_AUTO_TEST_CASE(lowerScheme)
{
AdHoc::Uri u("HtTP://localhost");
@@ -132,7 +145,7 @@ BOOST_AUTO_TEST_CASE(query0)
AdHoc::Uri u("http://localhost/?");
BOOST_CHECK_EQUAL("http", u.scheme);
BOOST_CHECK_EQUAL("localhost", u.host);
- BOOST_CHECK_EQUAL(0, u.query.size());
+ BOOST_CHECK(u.query.empty());
}
BOOST_AUTO_TEST_CASE(query1)
@@ -140,9 +153,10 @@ BOOST_AUTO_TEST_CASE(query1)
AdHoc::Uri u("http://localhost/?var=val");
BOOST_CHECK_EQUAL("http", u.scheme);
BOOST_CHECK_EQUAL("localhost", u.host);
- BOOST_CHECK_EQUAL(1, u.query.size());
- BOOST_CHECK_EQUAL("var", u.query.begin()->first);
- BOOST_CHECK_EQUAL("val", u.query.begin()->second);
+ BOOST_CHECK_EQUAL_IF(1, u.query.size()) {
+ BOOST_CHECK_EQUAL("var", u.query.begin()->first);
+ BOOST_CHECK_EQUAL("val", u.query.begin()->second);
+ }
}
BOOST_AUTO_TEST_CASE(query2)
@@ -150,11 +164,12 @@ BOOST_AUTO_TEST_CASE(query2)
AdHoc::Uri u("http://localhost/?var=val&name=value");
BOOST_CHECK_EQUAL("http", u.scheme);
BOOST_CHECK_EQUAL("localhost", u.host);
- BOOST_CHECK_EQUAL(2, u.query.size());
- BOOST_CHECK_EQUAL("name", u.query.begin()->first);
- BOOST_CHECK_EQUAL("value", u.query.begin()->second);
- BOOST_CHECK_EQUAL("var", u.query.rbegin()->first);
- BOOST_CHECK_EQUAL("val", u.query.rbegin()->second);
+ BOOST_CHECK_EQUAL_IF(2, u.query.size()) {
+ BOOST_CHECK_EQUAL("name", u.query.begin()->first);
+ BOOST_CHECK_EQUAL("value", u.query.begin()->second);
+ BOOST_CHECK_EQUAL("var", u.query.rbegin()->first);
+ BOOST_CHECK_EQUAL("val", u.query.rbegin()->second);
+ }
}
BOOST_AUTO_TEST_CASE(queryMany)
@@ -162,11 +177,12 @@ BOOST_AUTO_TEST_CASE(queryMany)
AdHoc::Uri u("http://localhost/?name=val&name=value");
BOOST_CHECK_EQUAL("http", u.scheme);
BOOST_CHECK_EQUAL("localhost", u.host);
- BOOST_CHECK_EQUAL(2, u.query.size());
- BOOST_CHECK_EQUAL("name", u.query.begin()->first);
- BOOST_CHECK_EQUAL("val", u.query.begin()->second);
- BOOST_CHECK_EQUAL("name", u.query.rbegin()->first);
- BOOST_CHECK_EQUAL("value", u.query.rbegin()->second);
+ BOOST_CHECK_EQUAL_IF(2, u.query.size()) {
+ BOOST_CHECK_EQUAL("name", u.query.begin()->first);
+ BOOST_CHECK_EQUAL("val", u.query.begin()->second);
+ BOOST_CHECK_EQUAL("name", u.query.rbegin()->first);
+ BOOST_CHECK_EQUAL("value", u.query.rbegin()->second);
+ }
}
BOOST_AUTO_TEST_CASE(queryNoValue1)
@@ -174,9 +190,10 @@ BOOST_AUTO_TEST_CASE(queryNoValue1)
AdHoc::Uri u("http://localhost/?n1");
BOOST_CHECK_EQUAL("http", u.scheme);
BOOST_CHECK_EQUAL("localhost", u.host);
- BOOST_CHECK_EQUAL(1, u.query.size());
- BOOST_CHECK_EQUAL("n1", u.query.begin()->first);
- BOOST_CHECK_EQUAL("", u.query.begin()->second);
+ BOOST_CHECK_EQUAL_IF(1, u.query.size()) {
+ BOOST_CHECK_EQUAL("n1", u.query.begin()->first);
+ BOOST_CHECK_EQUAL("", u.query.begin()->second);
+ }
}
BOOST_AUTO_TEST_CASE(queryNoValue1eq)
@@ -184,9 +201,10 @@ BOOST_AUTO_TEST_CASE(queryNoValue1eq)
AdHoc::Uri u("http://localhost/?n1=");
BOOST_CHECK_EQUAL("http", u.scheme);
BOOST_CHECK_EQUAL("localhost", u.host);
- BOOST_CHECK_EQUAL(1, u.query.size());
- BOOST_CHECK_EQUAL("n1", u.query.begin()->first);
- BOOST_CHECK_EQUAL("", u.query.begin()->second);
+ BOOST_CHECK_EQUAL_IF(1, u.query.size()) {
+ BOOST_CHECK_EQUAL("n1", u.query.begin()->first);
+ BOOST_CHECK_EQUAL("", u.query.begin()->second);
+ }
}
BOOST_AUTO_TEST_CASE(queryNoValue2)
@@ -194,11 +212,12 @@ BOOST_AUTO_TEST_CASE(queryNoValue2)
AdHoc::Uri u("http://localhost/?n1=&n2");
BOOST_CHECK_EQUAL("http", u.scheme);
BOOST_CHECK_EQUAL("localhost", u.host);
- BOOST_CHECK_EQUAL(2, u.query.size());
- BOOST_CHECK_EQUAL("n1", u.query.begin()->first);
- BOOST_CHECK_EQUAL("", u.query.begin()->second);
- BOOST_CHECK_EQUAL("n2", u.query.rbegin()->first);
- BOOST_CHECK_EQUAL("", u.query.rbegin()->second);
+ BOOST_CHECK_EQUAL_IF(2, u.query.size()) {
+ BOOST_CHECK_EQUAL("n1", u.query.begin()->first);
+ BOOST_CHECK_EQUAL("", u.query.begin()->second);
+ BOOST_CHECK_EQUAL("n2", u.query.rbegin()->first);
+ BOOST_CHECK_EQUAL("", u.query.rbegin()->second);
+ }
}
BOOST_AUTO_TEST_CASE(fragment)
@@ -239,8 +258,18 @@ BOOST_AUTO_TEST_CASE(bad)
BOOST_CHECK_THROW(AdHoc::Uri("http:/"), AdHoc::InvalidUri);
BOOST_CHECK_THROW(AdHoc::Uri("tcp://"), AdHoc::InvalidUri);
BOOST_CHECK_THROW(AdHoc::Uri("ftp/local"), AdHoc::InvalidUri);
- BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:"), std::bad_cast);
- BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:foo"), std::bad_cast);
+ BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:"), AdHoc::InvalidUri);
+ BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:foo"), AdHoc::InvalidUri);
+ BOOST_CHECK_THROW(AdHoc::Uri("tcp://:"), AdHoc::InvalidUri);
+ BOOST_CHECK_THROW(AdHoc::Uri("tcp:///"), AdHoc::InvalidUri);
+ BOOST_CHECK_THROW(AdHoc::Uri("tcp://[abcd"), AdHoc::InvalidUri);
+ BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:foo/"), AdHoc::InvalidUri);
+ BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:80a/"), AdHoc::InvalidUri);
+ BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:80a"), AdHoc::InvalidUri);
+ BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:-80"), AdHoc::InvalidUri);
+ BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:-1"), AdHoc::InvalidUri);
+ BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:65536"), AdHoc::InvalidUri);
+ BOOST_CHECK_THROW(AdHoc::Uri("tcp://local:/"), AdHoc::InvalidUri);
BOOST_CHECK_THROW(AdHoc::Uri("tcp://user:pass@"), AdHoc::InvalidUri);
AdHoc::InvalidUri ui("message", "http://localhost");
diff --git a/libadhocutil/uriParse.cpp b/libadhocutil/uriParse.cpp
index 489747e..e2cff82 100644
--- a/libadhocutil/uriParse.cpp
+++ b/libadhocutil/uriParse.cpp
@@ -3,178 +3,140 @@
#include <boost/algorithm/string/case_conv.hpp>
#include <boost/lexical_cast.hpp>
#include <cctype>
+#include <charconv>
#include <cstdint>
#include <cstring>
#include <utility>
namespace AdHoc {
- static inline int
- _is_scheme_char(int c)
- {
- return (!isalpha(c) && '+' != c && '-' != c && '.' != c) ? 0 : 1;
- }
-
Uri::Uri(const std::string & uri)
{
- auto * puri = this;
- const char * curstr;
- int userpass_flag;
- int bracket_flag;
-
- curstr = uri.c_str();
+ auto is_scheme_char = [](int c) {
+ return (!std::isalpha(c) && '+' != c && '-' != c && '.' != c) ? 0 : 1;
+ };
+ auto endor = [](std::string_view haystack, auto needle) {
+ if (const auto n = haystack.find_first_of(needle); n != std::string_view::npos) {
+ return n;
+ }
+ return haystack.length();
+ };
+ auto parsePort = [&uri, this](const std::string_view in) {
+ if (in.empty()) {
+ throw InvalidUri("Invalid format; no port after :", uri);
+ }
+ port.emplace();
+ if (const auto rc = std::from_chars(in.begin(), in.end(), *port);
+ rc.ptr != in.end() || rc.ec != std::errc {}) {
+ throw InvalidUri("Invalid port", uri);
+ }
+ };
- const char * tmpstr = ::strchr(curstr, ':');
- if (!tmpstr) {
- throw InvalidUri("Schema marker not found", uri);
+ std::string_view curstr = uri;
+ if (auto colon = curstr.find(':'); colon == std::string_view::npos) {
+ throw InvalidUri("Scheme marker not found", uri);
}
- auto len = tmpstr - curstr;
- for (decltype(len) i = 0; i < len; i++) {
- if (!_is_scheme_char(curstr[i])) {
- throw InvalidUri("Invalid format", uri);
+ else {
+ if (!std::all_of(curstr.begin(), curstr.begin() + colon, is_scheme_char)) {
+ throw InvalidUri("Invalid format; no scheme end", uri);
}
+ scheme = curstr.substr(0, colon);
+ boost::algorithm::to_lower(scheme);
+ curstr.remove_prefix(colon + 1);
}
- puri->scheme = std::string(curstr, len);
- boost::algorithm::to_lower(puri->scheme);
- tmpstr++;
- curstr = tmpstr;
- for (int i = 0; i < 2; i++) {
- if ('/' != *curstr) {
- throw InvalidUri("Invalid format", uri);
- }
- curstr++;
+ if (!curstr.starts_with("//")) {
+ throw InvalidUri("Invalid format; // not where expected", uri);
}
+ curstr.remove_prefix(2);
- userpass_flag = 0;
- tmpstr = curstr;
- while ('\0' != *tmpstr) {
- if ('@' == *tmpstr) {
- userpass_flag = 1;
- break;
+ if (const auto n = curstr.find_first_of("@/"); n != std::string_view::npos && curstr[n] == '@') {
+ if (const auto colon = curstr.find_first_of("@:"); curstr[colon] == ':') {
+ username = curstr.substr(0, colon);
+ password = curstr.substr(colon + 1, n - colon - 1);
}
- else if ('/' == *tmpstr) {
- userpass_flag = 0;
- break;
+ else {
+ username = curstr.substr(0, n);
}
- tmpstr++;
+ curstr.remove_prefix(n + 1);
}
- tmpstr = curstr;
- if (userpass_flag) {
- while ('\0' != *tmpstr && ':' != *tmpstr && '@' != *tmpstr) {
- tmpstr++;
- }
- len = tmpstr - curstr;
- puri->username = std::string(curstr, len);
- curstr = tmpstr;
- if (':' == *curstr) {
- curstr++;
- tmpstr = curstr;
- while ('\0' != *tmpstr && '@' != *tmpstr) {
- tmpstr++;
- }
- len = tmpstr - curstr;
- puri->password = std::string(curstr, len);
- curstr = tmpstr;
+ if (curstr.empty()) {
+ throw InvalidUri("Invalid format", uri);
+ }
+ if (curstr.starts_with('[')) {
+ if (const auto closeb = curstr.find(']'); closeb == std::string_view::npos) {
+ throw InvalidUri("IPv6 address not terminated", uri);
}
- if ('@' != *curstr) {
- throw InvalidUri("Invalid format", uri);
+ else {
+ host = curstr.substr(0, closeb + 1);
+ curstr.remove_prefix(closeb + 1);
}
- curstr++;
- }
-
- if ('[' == *curstr) {
- bracket_flag = 1;
}
else {
- bracket_flag = 0;
- }
-
- tmpstr = curstr;
- while ('\0' != *tmpstr) {
- if (bracket_flag && ']' == *tmpstr) {
- tmpstr++;
- break;
+ if (const auto hostend = curstr.find_first_of(":/"); hostend == std::string_view::npos) {
+ host = curstr;
+ return;
}
- else if (!bracket_flag && (':' == *tmpstr || '/' == *tmpstr)) {
- break;
+ else {
+ if (hostend == 0) {
+ throw InvalidUri("Host cannot be blank", uri);
+ }
+ host = curstr.substr(0, hostend);
+ boost::algorithm::to_lower(host);
+ curstr.remove_prefix(hostend);
}
- tmpstr++;
- }
- if (tmpstr == curstr) {
- throw InvalidUri("Host cannot be blank", uri);
}
- len = tmpstr - curstr;
- puri->host = std::string(curstr, len);
- boost::algorithm::to_lower(puri->host);
- curstr = tmpstr;
+ boost::algorithm::to_lower(host);
- if (':' == *curstr) {
- curstr++;
- tmpstr = curstr;
- while ('\0' != *tmpstr && '/' != *tmpstr) {
- tmpstr++;
+ if (curstr.empty()) {
+ return;
+ }
+ if (curstr.starts_with(':')) {
+ curstr.remove_prefix(1);
+ if (curstr.empty()) {
+ throw InvalidUri("Invalid format; no port after :", uri);
+ }
+ if (const auto portend = curstr.find('/'); portend == std::string_view::npos) {
+ parsePort(curstr);
+ return;
+ }
+ else {
+ parsePort(curstr.substr(0, portend));
+ curstr.remove_prefix(portend);
}
- len = tmpstr - curstr;
- puri->port = boost::lexical_cast<uint16_t>(std::string(curstr, len));
- curstr = tmpstr;
}
+ curstr.remove_prefix(1);
- if ('\0' == *curstr) {
+ if (const auto pathend = curstr.find_first_of("#?"); pathend == std::string_view::npos) {
+ path = curstr;
return;
}
-
- if ('/' != *curstr) {
- throw InvalidUri("Invalid format", uri);
- }
- curstr++;
-
- tmpstr = curstr;
- while ('\0' != *tmpstr && '#' != *tmpstr && '?' != *tmpstr) {
- tmpstr++;
+ else {
+ path = curstr.substr(0, pathend);
+ curstr.remove_prefix(pathend);
}
- len = tmpstr - curstr;
- puri->path = std::string(curstr, len);
- curstr = tmpstr;
- if ('?' == *curstr) {
- curstr++;
- tmpstr = curstr;
- while ('\0' != *tmpstr && '#' != *tmpstr) {
- while ('\0' != *tmpstr && '#' != *tmpstr && '=' != *tmpstr && '&' != *tmpstr) {
- tmpstr++;
- }
- len = tmpstr - curstr;
- auto q = puri->query.insert({std::string(curstr, len), std::string()});
- curstr = tmpstr;
- if ('=' == *curstr) {
- curstr++;
- while ('\0' != *tmpstr && '#' != *tmpstr && '&' != *tmpstr) {
- tmpstr++;
- }
- len = tmpstr - curstr;
- q->second = std::string(curstr, len);
- curstr = tmpstr;
+ if (curstr.starts_with('?')) {
+ curstr.remove_prefix(1);
+ auto params = curstr.substr(0, endor(curstr, '#'));
+ while (!params.empty()) {
+ const auto pair = params.substr(0, endor(params, '&'));
+ if (const auto eq = pair.find('='); eq != std::string_view::npos) {
+ query.emplace(pair.substr(0, eq), pair.substr(eq + 1));
}
- if ('&' == *tmpstr) {
- tmpstr++;
- curstr = tmpstr;
+ else {
+ query.emplace(pair, std::string {});
}
- else if ('\0' != *tmpstr && '#' != *tmpstr) {
- throw InvalidUri("Parse error in query params", uri);
+ params.remove_prefix(pair.length());
+ if (!params.empty()) {
+ params.remove_prefix(1);
}
}
- curstr = tmpstr;
}
- if ('#' == *curstr) {
- curstr++;
- tmpstr = curstr;
- while ('\0' != *tmpstr) {
- tmpstr++;
- }
- len = tmpstr - curstr;
- puri->fragment = std::string(curstr, len);
+ if (curstr.starts_with('#')) {
+ curstr.remove_prefix(1);
+ fragment = curstr;
}
}