summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/ingestor.cpp13
-rw-r--r--src/ingestor.hpp1
-rw-r--r--src/logTypes.hpp1
-rw-r--r--src/schema.sql2
-rw-r--r--test/test-ingest.cpp36
5 files changed, 48 insertions, 5 deletions
diff --git a/src/ingestor.cpp b/src/ingestor.cpp
index e1cc7c6..db5f317 100644
--- a/src/ingestor.cpp
+++ b/src/ingestor.cpp
@@ -121,7 +121,7 @@ namespace WebStat {
if (logIn.revents) {
if (auto line = scn::scan<std::string>(input, "{:[^\n]}\n")) {
linesRead++;
- ingestLogLine(dbpool->get().get(), line->value());
+ ingestLogLine(line->value());
}
else {
break;
@@ -137,6 +137,12 @@ namespace WebStat {
}
void
+ Ingestor::ingestLogLine(const std::string_view line)
+ {
+ ingestLogLine(dbpool->get().get(), line);
+ }
+
+ void
Ingestor::ingestLogLine(DB::Connection * dbconn, const std::string_view line)
{
if (auto result = scanLogLine(line)) {
@@ -150,8 +156,9 @@ namespace WebStat {
storeLogLine(dbconn, values);
}
else {
- syslog(LOG_WARNING, "Discarded line: [%.*s]", static_cast<int>(line.length()), line.data());
linesDiscarded++;
+ const auto unparsableLine = toEntity(line, EntityType::UnparsableLine);
+ storeEntities(dbconn, {unparsableLine});
}
}
@@ -185,7 +192,7 @@ namespace WebStat {
Ingestor::storeEntities(DB::Connection * dbconn, const std::span<const std::optional<Entity>> values) const
{
static constexpr std::array ENTITY_TYPE_VALUES {
- "host", "virtual_host", "path", "query_string", "referrer", "user_agent"};
+ "host", "virtual_host", "path", "query_string", "referrer", "user_agent", "unparsable_line"};
auto insert = dbconn->modify(SQL::ENTITY_INSERT, SQL::ENTITY_INSERT_OPTS);
std::ranges::for_each(
diff --git a/src/ingestor.hpp b/src/ingestor.hpp
index ffdcc9a..879526d 100644
--- a/src/ingestor.hpp
+++ b/src/ingestor.hpp
@@ -27,6 +27,7 @@ namespace WebStat {
[[nodiscard]] static ScanResult scanLogLine(std::string_view);
void ingestLog(std::FILE *);
+ void ingestLogLine(std::string_view);
void ingestLogLine(DB::Connection *, std::string_view);
template<typename... T> void storeLogLine(DB::Connection *, const std::tuple<T...> &) const;
diff --git a/src/logTypes.hpp b/src/logTypes.hpp
index 687f43b..7a78cc1 100644
--- a/src/logTypes.hpp
+++ b/src/logTypes.hpp
@@ -29,6 +29,7 @@ namespace WebStat {
QueryString,
Referrer,
UserAgent,
+ UnparsableLine,
};
using Crc32Value = uint32_t;
diff --git a/src/schema.sql b/src/schema.sql
index 3c6285b..4f3b205 100644
--- a/src/schema.sql
+++ b/src/schema.sql
@@ -1,6 +1,6 @@
CREATE TYPE http_verb AS ENUM('GET', 'HEAD', 'OPTIONS', 'TRACE', 'PUT', 'DELETE', 'POST', 'PATCH', 'CONNECT');
CREATE TYPE protocol AS ENUM('HTTP/1.0', 'HTTP/1.1', 'HTTP/1.2', 'HTTP/1.3', 'HTTP/2.0', 'HTTPS/3.0');
-CREATE TYPE entity AS ENUM('host', 'virtual_host', 'path', 'query_string', 'referrer', 'user_agent');
+CREATE TYPE entity AS ENUM('host', 'virtual_host', 'path', 'query_string', 'referrer', 'user_agent', 'unparsable_line');
CREATE TABLE entities (
id bigint NOT NULL,
diff --git a/test/test-ingest.cpp b/test/test-ingest.cpp
index 1ac6167..722763a 100644
--- a/test/test-ingest.cpp
+++ b/test/test-ingest.cpp
@@ -3,6 +3,7 @@
#include <boost/test/unit_test.hpp>
#include "test-util.hpp"
+#include <selectcommandUtil.impl.h>
#include <ingestor.hpp>
#include <uaLookup.hpp>
@@ -29,10 +30,31 @@ namespace std {
{
return std::apply(
[&strm](auto &&... elems) -> decltype(auto) {
- return ((strm << elems << '\n'), ...);
+ return ((strm << '{' << elems << ", "), ...) << '}';
},
values);
}
+
+ template<typename... T>
+ ostream &
+ operator<<(ostream & strm, const DB::Row<T...> & row)
+ {
+ return [&]<size_t... Field>(std::integer_sequence<size_t, Field...>) -> decltype(auto) {
+ return ((strm << '{' << row.template get<Field>() << ", "), ...) << '}';
+ }(std::make_integer_sequence<size_t, sizeof...(T)>());
+ }
+}
+
+namespace DB {
+ template<typename... T>
+ bool
+ operator!=(const Row<T...> & row, const std::tuple<T...> & expected)
+ {
+ return [&]<size_t... Field>(std::integer_sequence<size_t, Field...>) {
+ return std::make_tuple(row.template get<Field>()...);
+ }(std::make_integer_sequence<size_t, sizeof...(T)>())
+ != expected;
+ }
}
BOOST_DATA_TEST_CASE(QuotedStringsGood,
@@ -222,6 +244,18 @@ BOOST_AUTO_TEST_CASE(FetchMockUserAgentDetail)
}
}
+BOOST_AUTO_TEST_CASE(DiscardUnparsable)
+{
+ BOOST_REQUIRE_NO_THROW(ingestLogLine("does not parse"));
+ auto dbconn = dbpool->get();
+ auto select = dbconn->select("SELECT id, value FROM entities WHERE type = 'unparsable_line'");
+ constexpr std::array<std::tuple<uint64_t, std::string_view>, 1> EXPECTED {{
+ {1664299262, "does not parse"},
+ }};
+ auto rows = select->as<uint64_t, std::string_view>();
+ BOOST_CHECK_EQUAL_COLLECTIONS(rows.begin(), rows.end(), EXPECTED.begin(), EXPECTED.end());
+}
+
BOOST_AUTO_TEST_SUITE_END();
BOOST_AUTO_TEST_CASE(FetchRealUserAgentDetail, *boost::unit_test::disabled())