summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDan Goodliffe <dan.goodliffe@octal.co.uk>2025-09-23 13:31:26 +0100
committerDan Goodliffe <dan.goodliffe@octal.co.uk>2025-09-23 13:31:26 +0100
commit71803b97f1c9e31f2027da48bb742353f9c43e62 (patch)
tree99f418aa8cf4d4dd9edd2de1985295c48b4ba904 /src
parent4785b4d8c5bac4e03ed8dd2a4c01ec098da60d7b (diff)
downloadwebstat-71803b97f1c9e31f2027da48bb742353f9c43e62.tar.bz2
webstat-71803b97f1c9e31f2027da48bb742353f9c43e62.tar.xz
webstat-71803b97f1c9e31f2027da48bb742353f9c43e62.zip
Write unparsable lines to the entity table
Diagnostics and the ability to ingest later.
Diffstat (limited to 'src')
-rw-r--r--src/ingestor.cpp13
-rw-r--r--src/ingestor.hpp1
-rw-r--r--src/logTypes.hpp1
-rw-r--r--src/schema.sql2
4 files changed, 13 insertions, 4 deletions
diff --git a/src/ingestor.cpp b/src/ingestor.cpp
index e1cc7c6..db5f317 100644
--- a/src/ingestor.cpp
+++ b/src/ingestor.cpp
@@ -121,7 +121,7 @@ namespace WebStat {
if (logIn.revents) {
if (auto line = scn::scan<std::string>(input, "{:[^\n]}\n")) {
linesRead++;
- ingestLogLine(dbpool->get().get(), line->value());
+ ingestLogLine(line->value());
}
else {
break;
@@ -137,6 +137,12 @@ namespace WebStat {
}
void
+ Ingestor::ingestLogLine(const std::string_view line)
+ {
+ ingestLogLine(dbpool->get().get(), line);
+ }
+
+ void
Ingestor::ingestLogLine(DB::Connection * dbconn, const std::string_view line)
{
if (auto result = scanLogLine(line)) {
@@ -150,8 +156,9 @@ namespace WebStat {
storeLogLine(dbconn, values);
}
else {
- syslog(LOG_WARNING, "Discarded line: [%.*s]", static_cast<int>(line.length()), line.data());
linesDiscarded++;
+ const auto unparsableLine = toEntity(line, EntityType::UnparsableLine);
+ storeEntities(dbconn, {unparsableLine});
}
}
@@ -185,7 +192,7 @@ namespace WebStat {
Ingestor::storeEntities(DB::Connection * dbconn, const std::span<const std::optional<Entity>> values) const
{
static constexpr std::array ENTITY_TYPE_VALUES {
- "host", "virtual_host", "path", "query_string", "referrer", "user_agent"};
+ "host", "virtual_host", "path", "query_string", "referrer", "user_agent", "unparsable_line"};
auto insert = dbconn->modify(SQL::ENTITY_INSERT, SQL::ENTITY_INSERT_OPTS);
std::ranges::for_each(
diff --git a/src/ingestor.hpp b/src/ingestor.hpp
index ffdcc9a..879526d 100644
--- a/src/ingestor.hpp
+++ b/src/ingestor.hpp
@@ -27,6 +27,7 @@ namespace WebStat {
[[nodiscard]] static ScanResult scanLogLine(std::string_view);
void ingestLog(std::FILE *);
+ void ingestLogLine(std::string_view);
void ingestLogLine(DB::Connection *, std::string_view);
template<typename... T> void storeLogLine(DB::Connection *, const std::tuple<T...> &) const;
diff --git a/src/logTypes.hpp b/src/logTypes.hpp
index 687f43b..7a78cc1 100644
--- a/src/logTypes.hpp
+++ b/src/logTypes.hpp
@@ -29,6 +29,7 @@ namespace WebStat {
QueryString,
Referrer,
UserAgent,
+ UnparsableLine,
};
using Crc32Value = uint32_t;
diff --git a/src/schema.sql b/src/schema.sql
index 3c6285b..4f3b205 100644
--- a/src/schema.sql
+++ b/src/schema.sql
@@ -1,6 +1,6 @@
CREATE TYPE http_verb AS ENUM('GET', 'HEAD', 'OPTIONS', 'TRACE', 'PUT', 'DELETE', 'POST', 'PATCH', 'CONNECT');
CREATE TYPE protocol AS ENUM('HTTP/1.0', 'HTTP/1.1', 'HTTP/1.2', 'HTTP/1.3', 'HTTP/2.0', 'HTTPS/3.0');
-CREATE TYPE entity AS ENUM('host', 'virtual_host', 'path', 'query_string', 'referrer', 'user_agent');
+CREATE TYPE entity AS ENUM('host', 'virtual_host', 'path', 'query_string', 'referrer', 'user_agent', 'unparsable_line');
CREATE TABLE entities (
id bigint NOT NULL,