summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDan Goodliffe <dan.goodliffe@octal.co.uk>2025-10-09 14:26:40 +0100
committerDan Goodliffe <dan.goodliffe@octal.co.uk>2025-10-09 14:26:40 +0100
commita7a082d8d471bc38f9aa1160b0e9f8daf3d35ba3 (patch)
treec3a2f2f7dfc7c0e5d63964a0482a78d2aa13306a /src
parentbc05a99df780313ba3cc298323e98beaa5b0ba54 (diff)
downloadwebstat-a7a082d8d471bc38f9aa1160b0e9f8daf3d35ba3.tar.bz2
webstat-a7a082d8d471bc38f9aa1160b0e9f8daf3d35ba3.tar.xz
webstat-a7a082d8d471bc38f9aa1160b0e9f8daf3d35ba3.zip
Fix up QuotedString/CLFString parsing
Refactors CLFString in terms of QuotedString, but with the optional of being null (nullopt) Moves the whole decode function into QuotedString's parser, fixing support for escaping of " which would otherwise prematurely end the string in the middle.
Diffstat (limited to 'src')
-rw-r--r--src/logTypes.cpp98
-rw-r--r--src/logTypes.hpp8
2 files changed, 46 insertions, 60 deletions
diff --git a/src/logTypes.cpp b/src/logTypes.cpp
index 42f0979..85c5f4b 100644
--- a/src/logTypes.cpp
+++ b/src/logTypes.cpp
@@ -4,16 +4,52 @@ namespace scn {
scan_expected<typename ContextType::iterator>
scanner<WebStat::QuotedString>::scan(WebStat::QuotedString & value, ContextType & ctx)
{
+ static constexpr auto BS_MAP = []() {
+ std::array<char, 128> map {};
+ map['f'] = '\f';
+ map['n'] = '\n';
+ map['r'] = '\r';
+ map['t'] = '\t';
+ map['v'] = '\v';
+ map['"'] = '"';
+ map['\\'] = '\\';
+ return map;
+ }();
+
if (auto empty = scn::scan<>(ctx.range(), R"("")")) {
return empty->begin();
}
- auto result = scn::scan<std::string>(ctx.range(), R"("{:[^"]}")");
- if (!result) {
- return unexpected(result.error());
+ auto simple = scn::scan<std::string>(ctx.range(), R"("{:[^\"]}")");
+ if (simple) {
+ value = std::move(simple->value());
+ return simple->begin();
}
- value = result->value();
- return result->begin();
+
+ if (auto openQuote = scn::scan<>(ctx.range(), R"(")")) {
+ ctx.advance_to(openQuote->begin());
+ while (true) {
+ if (auto closeQuote = scn::scan<>(ctx.range(), R"(")")) {
+ return closeQuote->begin();
+ }
+ if (auto plain = scn::scan<std::string>(ctx.range(), R"({:[^\"]})")) {
+ value.append(plain->value());
+ ctx.advance_to(plain->begin());
+ }
+ else if (auto hex = scn::scan<unsigned char>(ctx.range(), R"HEX(\x{:.2x})HEX")) {
+ value.append(1, static_cast<char>(hex->value()));
+ ctx.advance_to(hex->begin());
+ }
+ else if (auto escaped = scn::scan<std::string>(ctx.range(), R"ESC(\{:.1[fnrtv"\]})ESC")) {
+ value.append(1, BS_MAP[static_cast<unsigned char>(escaped->value().front())]);
+ ctx.advance_to(escaped->begin());
+ }
+ else {
+ return unexpected(simple.error());
+ }
+ }
+ }
+ return unexpected(simple.error());
}
scan_expected<typename ContextType::iterator>
@@ -32,65 +68,17 @@ namespace scn {
if (!result) {
return unexpected(result.error());
}
- value = result->value();
+ value = std::move(result->value());
return result->begin();
}
scan_expected<typename ContextType::iterator>
scanner<WebStat::CLFString>::scan(WebStat::CLFString & value, ContextType & ctx)
{
- if (auto empty = scn::scan<>(ctx.range(), R"("")")) {
- value.emplace();
- return empty->begin();
- }
-
if (auto null = scn::scan<>(ctx.range(), R"("-")")) {
return null->begin();
}
- auto result = scn::scan<std::string>(ctx.range(), R"("{:[^"]}")");
- if (!result) {
- return unexpected(result.error());
- }
- value = result->value();
- decode(*value);
- return result->begin();
- }
-
- void
- scanner<WebStat::CLFString>::decode(std::string & value)
- {
- static constexpr auto BS_MAP = []() {
- std::array<char, 128> map {};
- map['f'] = '\f';
- map['n'] = '\n';
- map['r'] = '\r';
- map['t'] = '\t';
- map['v'] = '\v';
- map['"'] = '"';
- map['\\'] = '\\';
- return map;
- }();
-
- if (auto src = std::ranges::find(value, '\\'); src != value.end()) {
- auto dest = src;
- while (src != value.cend()) {
- if (*src == '\\') {
- const std::string_view escaped {++src, value.end()};
- if (auto chr = BS_MAP[static_cast<unsigned char>(*src)]) {
- *dest++ = chr;
- src++;
- }
- else if (auto hex = scn::scan<unsigned char>(escaped, R"(x{:.2x})")) {
- *dest++ = static_cast<char>(hex->value());
- src += 3;
- }
- }
- else {
- *dest++ = *src++;
- }
- }
- value.erase(dest, value.end());
- }
+ return scn::scanner<WebStat::QuotedString> {}.scan(value.emplace(), ctx);
}
}
diff --git a/src/logTypes.hpp b/src/logTypes.hpp
index 7a78cc1..0262060 100644
--- a/src/logTypes.hpp
+++ b/src/logTypes.hpp
@@ -16,9 +16,9 @@ namespace WebStat {
bool operator<=>(const QueryString &) const = default;
};
- struct CLFString : std::optional<std::string> {
- using std::optional<std::string>::optional;
- using std::optional<std::string>::operator=;
+ struct CLFString : std::optional<QuotedString> {
+ using std::optional<QuotedString>::optional;
+ using std::optional<QuotedString>::operator=;
bool operator<=>(const CLFString &) const = default;
};
@@ -49,7 +49,5 @@ namespace scn {
template<> struct scanner<WebStat::CLFString> : scanner<std::string, char> {
static scan_expected<typename ContextType::iterator> scan(WebStat::CLFString & value, ContextType & ctx);
-
- static void decode(std::string &);
};
}