diff options
-rw-r--r-- | src/ingestor.cpp | 13 | ||||
-rw-r--r-- | src/ingestor.hpp | 1 | ||||
-rw-r--r-- | src/logTypes.hpp | 1 | ||||
-rw-r--r-- | src/schema.sql | 2 | ||||
-rw-r--r-- | test/test-ingest.cpp | 36 |
5 files changed, 48 insertions, 5 deletions
diff --git a/src/ingestor.cpp b/src/ingestor.cpp index e1cc7c6..db5f317 100644 --- a/src/ingestor.cpp +++ b/src/ingestor.cpp @@ -121,7 +121,7 @@ namespace WebStat { if (logIn.revents) { if (auto line = scn::scan<std::string>(input, "{:[^\n]}\n")) { linesRead++; - ingestLogLine(dbpool->get().get(), line->value()); + ingestLogLine(line->value()); } else { break; @@ -137,6 +137,12 @@ namespace WebStat { } void + Ingestor::ingestLogLine(const std::string_view line) + { + ingestLogLine(dbpool->get().get(), line); + } + + void Ingestor::ingestLogLine(DB::Connection * dbconn, const std::string_view line) { if (auto result = scanLogLine(line)) { @@ -150,8 +156,9 @@ namespace WebStat { storeLogLine(dbconn, values); } else { - syslog(LOG_WARNING, "Discarded line: [%.*s]", static_cast<int>(line.length()), line.data()); linesDiscarded++; + const auto unparsableLine = toEntity(line, EntityType::UnparsableLine); + storeEntities(dbconn, {unparsableLine}); } } @@ -185,7 +192,7 @@ namespace WebStat { Ingestor::storeEntities(DB::Connection * dbconn, const std::span<const std::optional<Entity>> values) const { static constexpr std::array ENTITY_TYPE_VALUES { - "host", "virtual_host", "path", "query_string", "referrer", "user_agent"}; + "host", "virtual_host", "path", "query_string", "referrer", "user_agent", "unparsable_line"}; auto insert = dbconn->modify(SQL::ENTITY_INSERT, SQL::ENTITY_INSERT_OPTS); std::ranges::for_each( diff --git a/src/ingestor.hpp b/src/ingestor.hpp index ffdcc9a..879526d 100644 --- a/src/ingestor.hpp +++ b/src/ingestor.hpp @@ -27,6 +27,7 @@ namespace WebStat { [[nodiscard]] static ScanResult scanLogLine(std::string_view); void ingestLog(std::FILE *); + void ingestLogLine(std::string_view); void ingestLogLine(DB::Connection *, std::string_view); template<typename... T> void storeLogLine(DB::Connection *, const std::tuple<T...> &) const; diff --git a/src/logTypes.hpp b/src/logTypes.hpp index 687f43b..7a78cc1 100644 --- a/src/logTypes.hpp +++ b/src/logTypes.hpp @@ -29,6 +29,7 @@ namespace WebStat { QueryString, Referrer, UserAgent, + UnparsableLine, }; using Crc32Value = uint32_t; diff --git a/src/schema.sql b/src/schema.sql index 3c6285b..4f3b205 100644 --- a/src/schema.sql +++ b/src/schema.sql @@ -1,6 +1,6 @@ CREATE TYPE http_verb AS ENUM('GET', 'HEAD', 'OPTIONS', 'TRACE', 'PUT', 'DELETE', 'POST', 'PATCH', 'CONNECT'); CREATE TYPE protocol AS ENUM('HTTP/1.0', 'HTTP/1.1', 'HTTP/1.2', 'HTTP/1.3', 'HTTP/2.0', 'HTTPS/3.0'); -CREATE TYPE entity AS ENUM('host', 'virtual_host', 'path', 'query_string', 'referrer', 'user_agent'); +CREATE TYPE entity AS ENUM('host', 'virtual_host', 'path', 'query_string', 'referrer', 'user_agent', 'unparsable_line'); CREATE TABLE entities ( id bigint NOT NULL, diff --git a/test/test-ingest.cpp b/test/test-ingest.cpp index 1ac6167..722763a 100644 --- a/test/test-ingest.cpp +++ b/test/test-ingest.cpp @@ -3,6 +3,7 @@ #include <boost/test/unit_test.hpp> #include "test-util.hpp" +#include <selectcommandUtil.impl.h> #include <ingestor.hpp> #include <uaLookup.hpp> @@ -29,10 +30,31 @@ namespace std { { return std::apply( [&strm](auto &&... elems) -> decltype(auto) { - return ((strm << elems << '\n'), ...); + return ((strm << '{' << elems << ", "), ...) << '}'; }, values); } + + template<typename... T> + ostream & + operator<<(ostream & strm, const DB::Row<T...> & row) + { + return [&]<size_t... Field>(std::integer_sequence<size_t, Field...>) -> decltype(auto) { + return ((strm << '{' << row.template get<Field>() << ", "), ...) << '}'; + }(std::make_integer_sequence<size_t, sizeof...(T)>()); + } +} + +namespace DB { + template<typename... T> + bool + operator!=(const Row<T...> & row, const std::tuple<T...> & expected) + { + return [&]<size_t... Field>(std::integer_sequence<size_t, Field...>) { + return std::make_tuple(row.template get<Field>()...); + }(std::make_integer_sequence<size_t, sizeof...(T)>()) + != expected; + } } BOOST_DATA_TEST_CASE(QuotedStringsGood, @@ -222,6 +244,18 @@ BOOST_AUTO_TEST_CASE(FetchMockUserAgentDetail) } } +BOOST_AUTO_TEST_CASE(DiscardUnparsable) +{ + BOOST_REQUIRE_NO_THROW(ingestLogLine("does not parse")); + auto dbconn = dbpool->get(); + auto select = dbconn->select("SELECT id, value FROM entities WHERE type = 'unparsable_line'"); + constexpr std::array<std::tuple<uint64_t, std::string_view>, 1> EXPECTED {{ + {1664299262, "does not parse"}, + }}; + auto rows = select->as<uint64_t, std::string_view>(); + BOOST_CHECK_EQUAL_COLLECTIONS(rows.begin(), rows.end(), EXPECTED.begin(), EXPECTED.end()); +} + BOOST_AUTO_TEST_SUITE_END(); BOOST_AUTO_TEST_CASE(FetchRealUserAgentDetail, *boost::unit_test::disabled()) |