From b2416925f8845b70ed25fb4ec7cde8ef11e8c239 Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Thu, 21 Aug 2025 20:39:52 +0100 Subject: Initial commit; basic Apache log parsing --- src/logTypes.cpp | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 src/logTypes.cpp (limited to 'src/logTypes.cpp') diff --git a/src/logTypes.cpp b/src/logTypes.cpp new file mode 100644 index 0000000..42f0979 --- /dev/null +++ b/src/logTypes.cpp @@ -0,0 +1,96 @@ +#include "logTypes.hpp" + +namespace scn { + scan_expected + scanner::scan(WebStat::QuotedString & value, ContextType & ctx) + { + if (auto empty = scn::scan<>(ctx.range(), R"("")")) { + return empty->begin(); + } + + auto result = scn::scan(ctx.range(), R"("{:[^"]}")"); + if (!result) { + return unexpected(result.error()); + } + value = result->value(); + return result->begin(); + } + + scan_expected + scanner::scan(WebStat::QueryString & value, ContextType & ctx) + { + if (auto null = scn::scan<>(ctx.range(), R"("")")) { + return null->begin(); + } + + if (auto empty = scn::scan<>(ctx.range(), R"("?")")) { + value.emplace(); + return empty->begin(); + } + + auto result = scn::scan(ctx.range(), R"("?{:[^"]}")"); + if (!result) { + return unexpected(result.error()); + } + value = result->value(); + return result->begin(); + } + + scan_expected + scanner::scan(WebStat::CLFString & value, ContextType & ctx) + { + if (auto empty = scn::scan<>(ctx.range(), R"("")")) { + value.emplace(); + return empty->begin(); + } + + if (auto null = scn::scan<>(ctx.range(), R"("-")")) { + return null->begin(); + } + + auto result = scn::scan(ctx.range(), R"("{:[^"]}")"); + if (!result) { + return unexpected(result.error()); + } + value = result->value(); + decode(*value); + return result->begin(); + } + + void + scanner::decode(std::string & value) + { + static constexpr auto BS_MAP = []() { + std::array map {}; + map['f'] = '\f'; + map['n'] = '\n'; + map['r'] = '\r'; + map['t'] = '\t'; + map['v'] = '\v'; + map['"'] = '"'; + map['\\'] = '\\'; + return map; + }(); + + if (auto src = std::ranges::find(value, '\\'); src != value.end()) { + auto dest = src; + while (src != value.cend()) { + if (*src == '\\') { + const std::string_view escaped {++src, value.end()}; + if (auto chr = BS_MAP[static_cast(*src)]) { + *dest++ = chr; + src++; + } + else if (auto hex = scn::scan(escaped, R"(x{:.2x})")) { + *dest++ = static_cast(hex->value()); + src += 3; + } + } + else { + *dest++ = *src++; + } + } + value.erase(dest, value.end()); + } + } +} -- cgit v1.2.3