diff options
author | Dan Goodliffe <dan.goodliffe@octal.co.uk> | 2025-10-09 14:26:40 +0100 |
---|---|---|
committer | Dan Goodliffe <dan.goodliffe@octal.co.uk> | 2025-10-09 14:26:40 +0100 |
commit | a7a082d8d471bc38f9aa1160b0e9f8daf3d35ba3 (patch) | |
tree | c3a2f2f7dfc7c0e5d63964a0482a78d2aa13306a /src | |
parent | bc05a99df780313ba3cc298323e98beaa5b0ba54 (diff) | |
download | webstat-a7a082d8d471bc38f9aa1160b0e9f8daf3d35ba3.tar.bz2 webstat-a7a082d8d471bc38f9aa1160b0e9f8daf3d35ba3.tar.xz webstat-a7a082d8d471bc38f9aa1160b0e9f8daf3d35ba3.zip |
Fix up QuotedString/CLFString parsing
Refactors CLFString in terms of QuotedString, but with the optional of
being null (nullopt)
Moves the whole decode function into QuotedString's parser, fixing
support for escaping of " which would otherwise prematurely end the
string in the middle.
Diffstat (limited to 'src')
-rw-r--r-- | src/logTypes.cpp | 98 | ||||
-rw-r--r-- | src/logTypes.hpp | 8 |
2 files changed, 46 insertions, 60 deletions
diff --git a/src/logTypes.cpp b/src/logTypes.cpp index 42f0979..85c5f4b 100644 --- a/src/logTypes.cpp +++ b/src/logTypes.cpp @@ -4,16 +4,52 @@ namespace scn { scan_expected<typename ContextType::iterator> scanner<WebStat::QuotedString>::scan(WebStat::QuotedString & value, ContextType & ctx) { + static constexpr auto BS_MAP = []() { + std::array<char, 128> map {}; + map['f'] = '\f'; + map['n'] = '\n'; + map['r'] = '\r'; + map['t'] = '\t'; + map['v'] = '\v'; + map['"'] = '"'; + map['\\'] = '\\'; + return map; + }(); + if (auto empty = scn::scan<>(ctx.range(), R"("")")) { return empty->begin(); } - auto result = scn::scan<std::string>(ctx.range(), R"("{:[^"]}")"); - if (!result) { - return unexpected(result.error()); + auto simple = scn::scan<std::string>(ctx.range(), R"("{:[^\"]}")"); + if (simple) { + value = std::move(simple->value()); + return simple->begin(); } - value = result->value(); - return result->begin(); + + if (auto openQuote = scn::scan<>(ctx.range(), R"(")")) { + ctx.advance_to(openQuote->begin()); + while (true) { + if (auto closeQuote = scn::scan<>(ctx.range(), R"(")")) { + return closeQuote->begin(); + } + if (auto plain = scn::scan<std::string>(ctx.range(), R"({:[^\"]})")) { + value.append(plain->value()); + ctx.advance_to(plain->begin()); + } + else if (auto hex = scn::scan<unsigned char>(ctx.range(), R"HEX(\x{:.2x})HEX")) { + value.append(1, static_cast<char>(hex->value())); + ctx.advance_to(hex->begin()); + } + else if (auto escaped = scn::scan<std::string>(ctx.range(), R"ESC(\{:.1[fnrtv"\]})ESC")) { + value.append(1, BS_MAP[static_cast<unsigned char>(escaped->value().front())]); + ctx.advance_to(escaped->begin()); + } + else { + return unexpected(simple.error()); + } + } + } + return unexpected(simple.error()); } scan_expected<typename ContextType::iterator> @@ -32,65 +68,17 @@ namespace scn { if (!result) { return unexpected(result.error()); } - value = result->value(); + value = std::move(result->value()); return result->begin(); } scan_expected<typename ContextType::iterator> scanner<WebStat::CLFString>::scan(WebStat::CLFString & value, ContextType & ctx) { - if (auto empty = scn::scan<>(ctx.range(), R"("")")) { - value.emplace(); - return empty->begin(); - } - if (auto null = scn::scan<>(ctx.range(), R"("-")")) { return null->begin(); } - auto result = scn::scan<std::string>(ctx.range(), R"("{:[^"]}")"); - if (!result) { - return unexpected(result.error()); - } - value = result->value(); - decode(*value); - return result->begin(); - } - - void - scanner<WebStat::CLFString>::decode(std::string & value) - { - static constexpr auto BS_MAP = []() { - std::array<char, 128> map {}; - map['f'] = '\f'; - map['n'] = '\n'; - map['r'] = '\r'; - map['t'] = '\t'; - map['v'] = '\v'; - map['"'] = '"'; - map['\\'] = '\\'; - return map; - }(); - - if (auto src = std::ranges::find(value, '\\'); src != value.end()) { - auto dest = src; - while (src != value.cend()) { - if (*src == '\\') { - const std::string_view escaped {++src, value.end()}; - if (auto chr = BS_MAP[static_cast<unsigned char>(*src)]) { - *dest++ = chr; - src++; - } - else if (auto hex = scn::scan<unsigned char>(escaped, R"(x{:.2x})")) { - *dest++ = static_cast<char>(hex->value()); - src += 3; - } - } - else { - *dest++ = *src++; - } - } - value.erase(dest, value.end()); - } + return scn::scanner<WebStat::QuotedString> {}.scan(value.emplace(), ctx); } } diff --git a/src/logTypes.hpp b/src/logTypes.hpp index 7a78cc1..0262060 100644 --- a/src/logTypes.hpp +++ b/src/logTypes.hpp @@ -16,9 +16,9 @@ namespace WebStat { bool operator<=>(const QueryString &) const = default; }; - struct CLFString : std::optional<std::string> { - using std::optional<std::string>::optional; - using std::optional<std::string>::operator=; + struct CLFString : std::optional<QuotedString> { + using std::optional<QuotedString>::optional; + using std::optional<QuotedString>::operator=; bool operator<=>(const CLFString &) const = default; }; @@ -49,7 +49,5 @@ namespace scn { template<> struct scanner<WebStat::CLFString> : scanner<std::string, char> { static scan_expected<typename ContextType::iterator> scan(WebStat::CLFString & value, ContextType & ctx); - - static void decode(std::string &); }; } |