summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Jamfile.jam7
-rw-r--r--src/ingestor.cpp40
-rw-r--r--src/ingestor.hpp23
-rw-r--r--src/logTypes.cpp96
-rw-r--r--src/logTypes.hpp42
-rw-r--r--src/webstat_logger_main.cpp7
6 files changed, 215 insertions, 0 deletions
diff --git a/src/Jamfile.jam b/src/Jamfile.jam
new file mode 100644
index 0000000..40ec28e
--- /dev/null
+++ b/src/Jamfile.jam
@@ -0,0 +1,7 @@
+lib webstat : ingestor.cpp logTypes.cpp :
+ <include>.
+ <library>../thirdparty//scn
+ : :
+ <include>.
+ ;
+exe webstat_logger : webstat_logger_main.cpp : <library>webstat ;
diff --git a/src/ingestor.cpp b/src/ingestor.cpp
new file mode 100644
index 0000000..5724b33
--- /dev/null
+++ b/src/ingestor.cpp
@@ -0,0 +1,40 @@
+#include "ingestor.hpp"
+#include <scn/scan.h>
+#include <syslog.h>
+
+namespace WebStat {
+ Ingestor::ScanResult
+ Ingestor::scanLogLine(std::string_view input)
+ {
+ return scn::scan< // Field : Apache format specifier : example
+ std::string_view, // virtual_host : %v : some.host.name
+ std::string_view, // remoteip : %a : 1.2.3.4 (or ipv6)
+ uint64_t, // request_time : %{usec}t : 123456790
+ std::string_view, // method : %m : GET
+ QuotedString, // URL : "%u" : "/foo/bar"
+ QueryString, // query_string : "%q" : "?query=string" or ""
+ std::string_view, // protocol : %r : HTTPS/2.0
+ unsigned short, // status : %>s : 200
+ unsigned int, // size : %B : 1234
+ unsigned int, // duration : %D : 1234
+ CLFString, // referrer : "%{Referer}i" : "https://google.com/whatever" or "-"
+ CLFString // user_agent : "%{User-agent}i" : "Chromium v123.4" or "-"
+ >(input, R"({} {} {} {:[A-Z]} {} {} {} {} {} {} {} {})");
+ }
+
+ void
+ Ingestor::ingestLog(std::FILE * input)
+ {
+ while (auto line = scn::scan<std::string>(input, "{:[^\n]}\n")) {
+ linesRead++;
+ if (auto result = scanLogLine(line->value())) {
+ linesParsed++;
+ std::ignore = result->values();
+ }
+ else {
+ syslog(LOG_WARNING, "Discarded line: [%s]", line->value().c_str());
+ linesDiscarded++;
+ }
+ }
+ }
+}
diff --git a/src/ingestor.hpp b/src/ingestor.hpp
new file mode 100644
index 0000000..97ce9f9
--- /dev/null
+++ b/src/ingestor.hpp
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "logTypes.hpp"
+#include <cstdio>
+#include <scn/scan.h>
+
+namespace WebStat {
+ class Ingestor {
+ public:
+ using ScanResult = decltype(scn::scan<std::string_view, std::string_view, uint64_t, std::string_view,
+ QuotedString, QueryString, std::string_view, unsigned short, unsigned int, unsigned int, CLFString,
+ CLFString>(std::declval<std::string_view>(), ""));
+
+ [[nodiscard]] static ScanResult scanLogLine(std::string_view);
+
+ void ingestLog(std::FILE *);
+
+ protected:
+ size_t linesRead = 0;
+ size_t linesParsed = 0;
+ size_t linesDiscarded = 0;
+ };
+}
diff --git a/src/logTypes.cpp b/src/logTypes.cpp
new file mode 100644
index 0000000..42f0979
--- /dev/null
+++ b/src/logTypes.cpp
@@ -0,0 +1,96 @@
+#include "logTypes.hpp"
+
+namespace scn {
+ scan_expected<typename ContextType::iterator>
+ scanner<WebStat::QuotedString>::scan(WebStat::QuotedString & value, ContextType & ctx)
+ {
+ if (auto empty = scn::scan<>(ctx.range(), R"("")")) {
+ return empty->begin();
+ }
+
+ auto result = scn::scan<std::string>(ctx.range(), R"("{:[^"]}")");
+ if (!result) {
+ return unexpected(result.error());
+ }
+ value = result->value();
+ return result->begin();
+ }
+
+ scan_expected<typename ContextType::iterator>
+ scanner<WebStat::QueryString>::scan(WebStat::QueryString & value, ContextType & ctx)
+ {
+ if (auto null = scn::scan<>(ctx.range(), R"("")")) {
+ return null->begin();
+ }
+
+ if (auto empty = scn::scan<>(ctx.range(), R"("?")")) {
+ value.emplace();
+ return empty->begin();
+ }
+
+ auto result = scn::scan<std::string>(ctx.range(), R"("?{:[^"]}")");
+ if (!result) {
+ return unexpected(result.error());
+ }
+ value = result->value();
+ return result->begin();
+ }
+
+ scan_expected<typename ContextType::iterator>
+ scanner<WebStat::CLFString>::scan(WebStat::CLFString & value, ContextType & ctx)
+ {
+ if (auto empty = scn::scan<>(ctx.range(), R"("")")) {
+ value.emplace();
+ return empty->begin();
+ }
+
+ if (auto null = scn::scan<>(ctx.range(), R"("-")")) {
+ return null->begin();
+ }
+
+ auto result = scn::scan<std::string>(ctx.range(), R"("{:[^"]}")");
+ if (!result) {
+ return unexpected(result.error());
+ }
+ value = result->value();
+ decode(*value);
+ return result->begin();
+ }
+
+ void
+ scanner<WebStat::CLFString>::decode(std::string & value)
+ {
+ static constexpr auto BS_MAP = []() {
+ std::array<char, 128> map {};
+ map['f'] = '\f';
+ map['n'] = '\n';
+ map['r'] = '\r';
+ map['t'] = '\t';
+ map['v'] = '\v';
+ map['"'] = '"';
+ map['\\'] = '\\';
+ return map;
+ }();
+
+ if (auto src = std::ranges::find(value, '\\'); src != value.end()) {
+ auto dest = src;
+ while (src != value.cend()) {
+ if (*src == '\\') {
+ const std::string_view escaped {++src, value.end()};
+ if (auto chr = BS_MAP[static_cast<unsigned char>(*src)]) {
+ *dest++ = chr;
+ src++;
+ }
+ else if (auto hex = scn::scan<unsigned char>(escaped, R"(x{:.2x})")) {
+ *dest++ = static_cast<char>(hex->value());
+ src += 3;
+ }
+ }
+ else {
+ *dest++ = *src++;
+ }
+ }
+ value.erase(dest, value.end());
+ }
+ }
+}
diff --git a/src/logTypes.hpp b/src/logTypes.hpp
new file mode 100644
index 0000000..d4f1b7b
--- /dev/null
+++ b/src/logTypes.hpp
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <optional>
+#include <scn/scan.h>
+#include <string>
+
+namespace WebStat {
+ struct QuotedString : std::string {
+ using std::string::string;
+ using std::string::operator=;
+ };
+
+ struct QueryString : std::optional<std::string> {
+ using std::optional<std::string>::optional;
+ using std::optional<std::string>::operator=;
+ bool operator<=>(const QueryString &) const = default;
+ };
+
+ struct CLFString : std::optional<std::string> {
+ using std::optional<std::string>::optional;
+ using std::optional<std::string>::operator=;
+ bool operator<=>(const CLFString &) const = default;
+ };
+}
+
+namespace scn {
+ using ContextType = scn::v4::basic_scan_context<scn::v4::detail::buffer_range_tag, char>;
+
+ template<> struct scanner<WebStat::QuotedString> : scanner<std::string, char> {
+ static scan_expected<typename ContextType::iterator> scan(WebStat::QuotedString & value, ContextType & ctx);
+ };
+
+ template<> struct scanner<WebStat::QueryString> : scanner<std::string, char> {
+ static scan_expected<typename ContextType::iterator> scan(WebStat::QueryString & value, ContextType & ctx);
+ };
+
+ template<> struct scanner<WebStat::CLFString> : scanner<std::string, char> {
+ static scan_expected<typename ContextType::iterator> scan(WebStat::CLFString & value, ContextType & ctx);
+
+ static void decode(std::string &);
+ };
+}
diff --git a/src/webstat_logger_main.cpp b/src/webstat_logger_main.cpp
new file mode 100644
index 0000000..c4d31d6
--- /dev/null
+++ b/src/webstat_logger_main.cpp
@@ -0,0 +1,7 @@
+#include "ingestor.hpp"
+
+int
+main(int, char **)
+{
+ WebStat::Ingestor {}.ingestLog(stdin);
+}