From 34051da2f27ffa40d0b6d20ae891a497fe73bfe5 Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Sat, 17 Jan 2026 19:40:47 +0000 Subject: Attempt to save uninsertable log lines to the entities table If that fails, we still park them as before, such as when the DB is unavailable. Those which are saved as entities require investigation why they couldn't be saved, much like UnparsableLines. --- src/ingestor.cpp | 28 ++++++++++++++++++++-------- src/logTypes.hpp | 1 + src/schema.sql | 3 ++- test/test-ingest.cpp | 4 ++-- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/ingestor.cpp b/src/ingestor.cpp index 44107ce..81642be 100644 --- a/src/ingestor.cpp +++ b/src/ingestor.cpp @@ -180,15 +180,26 @@ namespace WebStat { linesParsed++; const auto values = crc32ScanValues(result->values()); NewEntityIds ids; - { - std::optional dbtx; - if (const auto newEnts = newEntities(values); newEnts.front()) { - dbtx.emplace(*dbconn); - ids = storeEntities(dbconn, newEnts); + try { + { + std::optional dbtx; + if (const auto newEnts = newEntities(values); newEnts.front()) { + dbtx.emplace(*dbconn); + ids = storeEntities(dbconn, newEnts); + } + storeLogLine(dbconn, values); + } + rememberNewEntityIds(ids); + } + catch (const DB::Error & originalError) { + try { + const auto uninsertableLine = ToEntity {}(line); + rememberNewEntityIds(storeEntities(dbconn, {uninsertableLine})); + } + catch (const std::exception &) { + throw originalError; } - storeLogLine(dbconn, values); } - rememberNewEntityIds(ids); } else { linesDiscarded++; @@ -310,7 +321,7 @@ namespace WebStat { Ingestor::NewEntityIds Ingestor::storeEntities(DB::Connection * dbconn, const std::span> values) const { - static constexpr std::array, 7> + static constexpr std::array, 8> ENTITY_TYPE_VALUES {{ {"host", nullptr}, {"virtual_host", nullptr}, @@ -319,6 +330,7 @@ namespace WebStat { {"referrer", nullptr}, {"user_agent", &Ingestor::onNewUserAgent}, {"unparsable_line", nullptr}, + {"uninsertable_line", nullptr}, }}; auto insert = dbconn->modify(SQL::ENTITY_INSERT, SQL::ENTITY_INSERT_OPTS); diff --git a/src/logTypes.hpp b/src/logTypes.hpp index 0262060..f9395d1 100644 --- a/src/logTypes.hpp +++ b/src/logTypes.hpp @@ -30,6 +30,7 @@ namespace WebStat { Referrer, UserAgent, UnparsableLine, + UninsertableLine, }; using Crc32Value = uint32_t; diff --git a/src/schema.sql b/src/schema.sql index d04535e..7648b79 100644 --- a/src/schema.sql +++ b/src/schema.sql @@ -27,7 +27,8 @@ CREATE TYPE entity AS ENUM( 'query_string', 'referrer', 'user_agent', - 'unparsable_line' + 'unparsable_line', + 'uninsertable_line' ); CREATE TABLE entities( diff --git a/test/test-ingest.cpp b/test/test-ingest.cpp index 9b77567..efc7bc9 100644 --- a/test/test-ingest.cpp +++ b/test/test-ingest.cpp @@ -263,8 +263,8 @@ BOOST_AUTO_TEST_CASE(ParkLogLineOnError) constexpr std::string_view LOGLINE_BAD_VERB = R"LOG(git.randomdan.homeip.net 98.82.40.168 1755561576768318 CAUSEPARK "/repo/gentoobrowse-api/commit/gentoobrowse-api/unittests/fixtures/756569aa764177340726dd3d40b41d89b11b20c7/app-crypt/pdfcrack/Manifest" "?h=gentoobrowse-api-0.9.1&id=a2ed3fd30333721accd4b697bfcb6cc4165c7714" HTTP/1.1 200 1884 107791 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot) Chrome/119.0.6045.214 Safari/537.36")LOG"; BOOST_REQUIRE_NO_THROW(ingestLogLine(LOGLINE_BAD_VERB)); - BOOST_CHECK_EQUAL(linesParked, 1); - BOOST_CHECK(existingEntities.empty()); + BOOST_CHECK_EQUAL(linesParked, 0); + BOOST_CHECK_EQUAL(existingEntities.size(), 1); } BOOST_AUTO_TEST_CASE(IngestParked, *boost::unit_test::depends_on("I/ParkLogLine")) -- cgit v1.2.3