From 71803b97f1c9e31f2027da48bb742353f9c43e62 Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Tue, 23 Sep 2025 13:31:26 +0100 Subject: Write unparsable lines to the entity table Diagnostics and the ability to ingest later. --- src/ingestor.cpp | 13 ++++++++++--- src/ingestor.hpp | 1 + src/logTypes.hpp | 1 + src/schema.sql | 2 +- test/test-ingest.cpp | 36 +++++++++++++++++++++++++++++++++++- 5 files changed, 48 insertions(+), 5 deletions(-) diff --git a/src/ingestor.cpp b/src/ingestor.cpp index e1cc7c6..db5f317 100644 --- a/src/ingestor.cpp +++ b/src/ingestor.cpp @@ -121,7 +121,7 @@ namespace WebStat { if (logIn.revents) { if (auto line = scn::scan(input, "{:[^\n]}\n")) { linesRead++; - ingestLogLine(dbpool->get().get(), line->value()); + ingestLogLine(line->value()); } else { break; @@ -136,6 +136,12 @@ namespace WebStat { } } + void + Ingestor::ingestLogLine(const std::string_view line) + { + ingestLogLine(dbpool->get().get(), line); + } + void Ingestor::ingestLogLine(DB::Connection * dbconn, const std::string_view line) { @@ -150,8 +156,9 @@ namespace WebStat { storeLogLine(dbconn, values); } else { - syslog(LOG_WARNING, "Discarded line: [%.*s]", static_cast(line.length()), line.data()); linesDiscarded++; + const auto unparsableLine = toEntity(line, EntityType::UnparsableLine); + storeEntities(dbconn, {unparsableLine}); } } @@ -185,7 +192,7 @@ namespace WebStat { Ingestor::storeEntities(DB::Connection * dbconn, const std::span> values) const { static constexpr std::array ENTITY_TYPE_VALUES { - "host", "virtual_host", "path", "query_string", "referrer", "user_agent"}; + "host", "virtual_host", "path", "query_string", "referrer", "user_agent", "unparsable_line"}; auto insert = dbconn->modify(SQL::ENTITY_INSERT, SQL::ENTITY_INSERT_OPTS); std::ranges::for_each( diff --git a/src/ingestor.hpp b/src/ingestor.hpp index ffdcc9a..879526d 100644 --- a/src/ingestor.hpp +++ b/src/ingestor.hpp @@ -27,6 +27,7 @@ namespace WebStat { [[nodiscard]] static ScanResult scanLogLine(std::string_view); void ingestLog(std::FILE *); + void ingestLogLine(std::string_view); void ingestLogLine(DB::Connection *, std::string_view); template void storeLogLine(DB::Connection *, const std::tuple &) const; diff --git a/src/logTypes.hpp b/src/logTypes.hpp index 687f43b..7a78cc1 100644 --- a/src/logTypes.hpp +++ b/src/logTypes.hpp @@ -29,6 +29,7 @@ namespace WebStat { QueryString, Referrer, UserAgent, + UnparsableLine, }; using Crc32Value = uint32_t; diff --git a/src/schema.sql b/src/schema.sql index 3c6285b..4f3b205 100644 --- a/src/schema.sql +++ b/src/schema.sql @@ -1,6 +1,6 @@ CREATE TYPE http_verb AS ENUM('GET', 'HEAD', 'OPTIONS', 'TRACE', 'PUT', 'DELETE', 'POST', 'PATCH', 'CONNECT'); CREATE TYPE protocol AS ENUM('HTTP/1.0', 'HTTP/1.1', 'HTTP/1.2', 'HTTP/1.3', 'HTTP/2.0', 'HTTPS/3.0'); -CREATE TYPE entity AS ENUM('host', 'virtual_host', 'path', 'query_string', 'referrer', 'user_agent'); +CREATE TYPE entity AS ENUM('host', 'virtual_host', 'path', 'query_string', 'referrer', 'user_agent', 'unparsable_line'); CREATE TABLE entities ( id bigint NOT NULL, diff --git a/test/test-ingest.cpp b/test/test-ingest.cpp index 1ac6167..722763a 100644 --- a/test/test-ingest.cpp +++ b/test/test-ingest.cpp @@ -3,6 +3,7 @@ #include #include "test-util.hpp" +#include #include #include @@ -29,10 +30,31 @@ namespace std { { return std::apply( [&strm](auto &&... elems) -> decltype(auto) { - return ((strm << elems << '\n'), ...); + return ((strm << '{' << elems << ", "), ...) << '}'; }, values); } + + template + ostream & + operator<<(ostream & strm, const DB::Row & row) + { + return [&](std::integer_sequence) -> decltype(auto) { + return ((strm << '{' << row.template get() << ", "), ...) << '}'; + }(std::make_integer_sequence()); + } +} + +namespace DB { + template + bool + operator!=(const Row & row, const std::tuple & expected) + { + return [&](std::integer_sequence) { + return std::make_tuple(row.template get()...); + }(std::make_integer_sequence()) + != expected; + } } BOOST_DATA_TEST_CASE(QuotedStringsGood, @@ -222,6 +244,18 @@ BOOST_AUTO_TEST_CASE(FetchMockUserAgentDetail) } } +BOOST_AUTO_TEST_CASE(DiscardUnparsable) +{ + BOOST_REQUIRE_NO_THROW(ingestLogLine("does not parse")); + auto dbconn = dbpool->get(); + auto select = dbconn->select("SELECT id, value FROM entities WHERE type = 'unparsable_line'"); + constexpr std::array, 1> EXPECTED {{ + {1664299262, "does not parse"}, + }}; + auto rows = select->as(); + BOOST_CHECK_EQUAL_COLLECTIONS(rows.begin(), rows.end(), EXPECTED.begin(), EXPECTED.end()); +} + BOOST_AUTO_TEST_SUITE_END(); BOOST_AUTO_TEST_CASE(FetchRealUserAgentDetail, *boost::unit_test::disabled()) -- cgit v1.2.3