diff options
author | Dan Goodliffe <dan@randomdan.homeip.net> | 2025-08-21 20:39:52 +0100 |
---|---|---|
committer | Dan Goodliffe <dan@randomdan.homeip.net> | 2025-08-25 16:00:59 +0100 |
commit | b2416925f8845b70ed25fb4ec7cde8ef11e8c239 (patch) | |
tree | 9ed898937ddceca6bcf0e2a6d6dfda3754dceefe /src/ingestor.cpp | |
download | webstat-b2416925f8845b70ed25fb4ec7cde8ef11e8c239.tar.bz2 webstat-b2416925f8845b70ed25fb4ec7cde8ef11e8c239.tar.xz webstat-b2416925f8845b70ed25fb4ec7cde8ef11e8c239.zip |
Initial commit; basic Apache log parsing
Diffstat (limited to 'src/ingestor.cpp')
-rw-r--r-- | src/ingestor.cpp | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/src/ingestor.cpp b/src/ingestor.cpp new file mode 100644 index 0000000..5724b33 --- /dev/null +++ b/src/ingestor.cpp @@ -0,0 +1,40 @@ +#include "ingestor.hpp" +#include <scn/scan.h> +#include <syslog.h> + +namespace WebStat { + Ingestor::ScanResult + Ingestor::scanLogLine(std::string_view input) + { + return scn::scan< // Field : Apache format specifier : example + std::string_view, // virtual_host : %v : some.host.name + std::string_view, // remoteip : %a : 1.2.3.4 (or ipv6) + uint64_t, // request_time : %{usec}t : 123456790 + std::string_view, // method : %m : GET + QuotedString, // URL : "%u" : "/foo/bar" + QueryString, // query_string : "%q" : "?query=string" or "" + std::string_view, // protocol : %r : HTTPS/2.0 + unsigned short, // status : %>s : 200 + unsigned int, // size : %B : 1234 + unsigned int, // duration : %D : 1234 + CLFString, // referrer : "%{Referer}i" : "https://google.com/whatever" or "-" + CLFString // user_agent : "%{User-agent}i" : "Chromium v123.4" or "-" + >(input, R"({} {} {} {:[A-Z]} {} {} {} {} {} {} {} {})"); + } + + void + Ingestor::ingestLog(std::FILE * input) + { + while (auto line = scn::scan<std::string>(input, "{:[^\n]}\n")) { + linesRead++; + if (auto result = scanLogLine(line->value())) { + linesParsed++; + std::ignore = result->values(); + } + else { + syslog(LOG_WARNING, "Discarded line: [%s]", line->value().c_str()); + linesDiscarded++; + } + } + } +} |