summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/logTypes.cpp98
-rw-r--r--src/logTypes.hpp8
-rw-r--r--test/test-ingest.cpp40
3 files changed, 68 insertions, 78 deletions
diff --git a/src/logTypes.cpp b/src/logTypes.cpp
index 42f0979..85c5f4b 100644
--- a/src/logTypes.cpp
+++ b/src/logTypes.cpp
@@ -4,16 +4,52 @@ namespace scn {
scan_expected<typename ContextType::iterator>
scanner<WebStat::QuotedString>::scan(WebStat::QuotedString & value, ContextType & ctx)
{
+ static constexpr auto BS_MAP = []() {
+ std::array<char, 128> map {};
+ map['f'] = '\f';
+ map['n'] = '\n';
+ map['r'] = '\r';
+ map['t'] = '\t';
+ map['v'] = '\v';
+ map['"'] = '"';
+ map['\\'] = '\\';
+ return map;
+ }();
+
if (auto empty = scn::scan<>(ctx.range(), R"("")")) {
return empty->begin();
}
- auto result = scn::scan<std::string>(ctx.range(), R"("{:[^"]}")");
- if (!result) {
- return unexpected(result.error());
+ auto simple = scn::scan<std::string>(ctx.range(), R"("{:[^\"]}")");
+ if (simple) {
+ value = std::move(simple->value());
+ return simple->begin();
}
- value = result->value();
- return result->begin();
+
+ if (auto openQuote = scn::scan<>(ctx.range(), R"(")")) {
+ ctx.advance_to(openQuote->begin());
+ while (true) {
+ if (auto closeQuote = scn::scan<>(ctx.range(), R"(")")) {
+ return closeQuote->begin();
+ }
+ if (auto plain = scn::scan<std::string>(ctx.range(), R"({:[^\"]})")) {
+ value.append(plain->value());
+ ctx.advance_to(plain->begin());
+ }
+ else if (auto hex = scn::scan<unsigned char>(ctx.range(), R"HEX(\x{:.2x})HEX")) {
+ value.append(1, static_cast<char>(hex->value()));
+ ctx.advance_to(hex->begin());
+ }
+ else if (auto escaped = scn::scan<std::string>(ctx.range(), R"ESC(\{:.1[fnrtv"\]})ESC")) {
+ value.append(1, BS_MAP[static_cast<unsigned char>(escaped->value().front())]);
+ ctx.advance_to(escaped->begin());
+ }
+ else {
+ return unexpected(simple.error());
+ }
+ }
+ }
+ return unexpected(simple.error());
}
scan_expected<typename ContextType::iterator>
@@ -32,65 +68,17 @@ namespace scn {
if (!result) {
return unexpected(result.error());
}
- value = result->value();
+ value = std::move(result->value());
return result->begin();
}
scan_expected<typename ContextType::iterator>
scanner<WebStat::CLFString>::scan(WebStat::CLFString & value, ContextType & ctx)
{
- if (auto empty = scn::scan<>(ctx.range(), R"("")")) {
- value.emplace();
- return empty->begin();
- }
-
if (auto null = scn::scan<>(ctx.range(), R"("-")")) {
return null->begin();
}
- auto result = scn::scan<std::string>(ctx.range(), R"("{:[^"]}")");
- if (!result) {
- return unexpected(result.error());
- }
- value = result->value();
- decode(*value);
- return result->begin();
- }
-
- void
- scanner<WebStat::CLFString>::decode(std::string & value)
- {
- static constexpr auto BS_MAP = []() {
- std::array<char, 128> map {};
- map['f'] = '\f';
- map['n'] = '\n';
- map['r'] = '\r';
- map['t'] = '\t';
- map['v'] = '\v';
- map['"'] = '"';
- map['\\'] = '\\';
- return map;
- }();
-
- if (auto src = std::ranges::find(value, '\\'); src != value.end()) {
- auto dest = src;
- while (src != value.cend()) {
- if (*src == '\\') {
- const std::string_view escaped {++src, value.end()};
- if (auto chr = BS_MAP[static_cast<unsigned char>(*src)]) {
- *dest++ = chr;
- src++;
- }
- else if (auto hex = scn::scan<unsigned char>(escaped, R"(x{:.2x})")) {
- *dest++ = static_cast<char>(hex->value());
- src += 3;
- }
- }
- else {
- *dest++ = *src++;
- }
- }
- value.erase(dest, value.end());
- }
+ return scn::scanner<WebStat::QuotedString> {}.scan(value.emplace(), ctx);
}
}
diff --git a/src/logTypes.hpp b/src/logTypes.hpp
index 7a78cc1..0262060 100644
--- a/src/logTypes.hpp
+++ b/src/logTypes.hpp
@@ -16,9 +16,9 @@ namespace WebStat {
bool operator<=>(const QueryString &) const = default;
};
- struct CLFString : std::optional<std::string> {
- using std::optional<std::string>::optional;
- using std::optional<std::string>::operator=;
+ struct CLFString : std::optional<QuotedString> {
+ using std::optional<QuotedString>::optional;
+ using std::optional<QuotedString>::operator=;
bool operator<=>(const CLFString &) const = default;
};
@@ -49,7 +49,5 @@ namespace scn {
template<> struct scanner<WebStat::CLFString> : scanner<std::string, char> {
static scan_expected<typename ContextType::iterator> scan(WebStat::CLFString & value, ContextType & ctx);
-
- static void decode(std::string &);
};
}
diff --git a/test/test-ingest.cpp b/test/test-ingest.cpp
index 388c440..9e401b3 100644
--- a/test/test-ingest.cpp
+++ b/test/test-ingest.cpp
@@ -63,6 +63,12 @@ BOOST_DATA_TEST_CASE(QuotedStringsGood,
{R"("-")", "-"},
{R"(".")", "."},
{R"("/url/path")", "/url/path"},
+ {R"("hex\x41")", "hexA"},
+ {R"("hex\x4141")", "hexA41"},
+ {R"("hex\x41\x41")", "hexAA"},
+ {R"("hex\t\x41")", "hex\tA"},
+ {R"("/packages/dev-php/pecl-uploadprogress(),')\"((,,")",
+ R"LOG(/packages/dev-php/pecl-uploadprogress(),')"((,,)LOG"},
}),
input, expected)
{
@@ -114,24 +120,6 @@ BOOST_DATA_TEST_CASE(QueryStringsBad,
BOOST_TEST_DECORATOR(*boost::unit_test::timeout(1))
-BOOST_DATA_TEST_CASE(CLFStringsDecode,
- boost::unit_test::data::make<WebStat::ParseData<std::string>>({
- {"", ""},
- {"plain", "plain"},
- {R"(hex\x41)", "hexA"},
- {R"(hex\x4141)", "hexA41"},
- {R"(hex\x41\x41)", "hexAA"},
- {R"(hex\t\x41)", "hex\tA"},
- }),
- input, expected)
-{
- std::string value {input};
- scn::scanner<WebStat::CLFString>::decode(value);
- BOOST_CHECK_EQUAL(value, expected);
-}
-
-BOOST_TEST_DECORATOR(*boost::unit_test::depends_on("CLFStringsDecode"))
-
BOOST_DATA_TEST_CASE(CLFStringsGood,
boost::unit_test::data::make<WebStat::ParseData<WebStat::CLFString>>({
{R"("")", ""},
@@ -192,6 +180,22 @@ BOOST_DATA_TEST_CASE(ExtractFields,
BOOST_CHECK_EQUAL(result->values(), expected);
}
+BOOST_AUTO_TEST_CASE(ExtractFieldsEdgeCasesUnparsable3580673700)
+{
+ const auto result = WebStat::Ingestor::scanLogLine(
+ R"LOG(gentoobrowse.randomdan.homeip.net 5.183.129.58 1759960912510520 GET "/packages/dev-php/pecl-uploadprogress(),')\"((,," "" HTTP/1.1 404 0 10051 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36")LOG");
+ BOOST_REQUIRE(result);
+ BOOST_CHECK_EQUAL(std::get<4>(result->values()), R"LOG(/packages/dev-php/pecl-uploadprogress(),')"((,,)LOG");
+}
+
+BOOST_AUTO_TEST_CASE(ExtractFieldsEdgeCasesUnparsable3603068405)
+{
+ const auto result = WebStat::Ingestor::scanLogLine(
+ R"LOG(gentoobrowse.randomdan.homeip.net 5.183.129.58 1759960912705682 GET "/packages/dev-php/pecl-uploadprogress'yqFSRA<'\">yuezhx" "" HTTP/1.1 404 0 19143 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36")LOG");
+ BOOST_REQUIRE(result);
+ BOOST_CHECK_EQUAL(std::get<4>(result->values()), R"LOG(/packages/dev-php/pecl-uploadprogress'yqFSRA<'">yuezhx)LOG");
+}
+
class TestIngestor : public WebStat::Ingestor {
public:
TestIngestor() :