diff options
| author | Dan Goodliffe <dan@randomdan.homeip.net> | 2026-01-17 19:40:47 +0000 |
|---|---|---|
| committer | Dan Goodliffe <dan@randomdan.homeip.net> | 2026-01-17 20:33:37 +0000 |
| commit | 34051da2f27ffa40d0b6d20ae891a497fe73bfe5 (patch) | |
| tree | 3f54e8ef762d980fd159f5fcee8ccdfe4a1b5b07 | |
| parent | 9f1dfacb0f236da5f0413ac20e3376388df7d798 (diff) | |
| download | webstat-34051da2f27ffa40d0b6d20ae891a497fe73bfe5.tar.bz2 webstat-34051da2f27ffa40d0b6d20ae891a497fe73bfe5.tar.xz webstat-34051da2f27ffa40d0b6d20ae891a497fe73bfe5.zip | |
Attempt to save uninsertable log lines to the entities table
If that fails, we still park them as before, such as when the DB is
unavailable. Those which are saved as entities require investigation why
they couldn't be saved, much like UnparsableLines.
| -rw-r--r-- | src/ingestor.cpp | 28 | ||||
| -rw-r--r-- | src/logTypes.hpp | 1 | ||||
| -rw-r--r-- | src/schema.sql | 3 | ||||
| -rw-r--r-- | test/test-ingest.cpp | 4 |
4 files changed, 25 insertions, 11 deletions
diff --git a/src/ingestor.cpp b/src/ingestor.cpp index 44107ce..81642be 100644 --- a/src/ingestor.cpp +++ b/src/ingestor.cpp @@ -180,15 +180,26 @@ namespace WebStat { linesParsed++; const auto values = crc32ScanValues(result->values()); NewEntityIds ids; - { - std::optional<DB::TransactionScope> dbtx; - if (const auto newEnts = newEntities(values); newEnts.front()) { - dbtx.emplace(*dbconn); - ids = storeEntities(dbconn, newEnts); + try { + { + std::optional<DB::TransactionScope> dbtx; + if (const auto newEnts = newEntities(values); newEnts.front()) { + dbtx.emplace(*dbconn); + ids = storeEntities(dbconn, newEnts); + } + storeLogLine(dbconn, values); + } + rememberNewEntityIds(ids); + } + catch (const DB::Error & originalError) { + try { + const auto uninsertableLine = ToEntity<EntityType::UninsertableLine> {}(line); + rememberNewEntityIds(storeEntities(dbconn, {uninsertableLine})); + } + catch (const std::exception &) { + throw originalError; } - storeLogLine(dbconn, values); } - rememberNewEntityIds(ids); } else { linesDiscarded++; @@ -310,7 +321,7 @@ namespace WebStat { Ingestor::NewEntityIds Ingestor::storeEntities(DB::Connection * dbconn, const std::span<const std::optional<Entity>> values) const { - static constexpr std::array<std::pair<std::string_view, void (Ingestor::*)(const Entity &) const>, 7> + static constexpr std::array<std::pair<std::string_view, void (Ingestor::*)(const Entity &) const>, 8> ENTITY_TYPE_VALUES {{ {"host", nullptr}, {"virtual_host", nullptr}, @@ -319,6 +330,7 @@ namespace WebStat { {"referrer", nullptr}, {"user_agent", &Ingestor::onNewUserAgent}, {"unparsable_line", nullptr}, + {"uninsertable_line", nullptr}, }}; auto insert = dbconn->modify(SQL::ENTITY_INSERT, SQL::ENTITY_INSERT_OPTS); diff --git a/src/logTypes.hpp b/src/logTypes.hpp index 0262060..f9395d1 100644 --- a/src/logTypes.hpp +++ b/src/logTypes.hpp @@ -30,6 +30,7 @@ namespace WebStat { Referrer, UserAgent, UnparsableLine, + UninsertableLine, }; using Crc32Value = uint32_t; diff --git a/src/schema.sql b/src/schema.sql index d04535e..7648b79 100644 --- a/src/schema.sql +++ b/src/schema.sql @@ -27,7 +27,8 @@ CREATE TYPE entity AS ENUM( 'query_string', 'referrer', 'user_agent', - 'unparsable_line' + 'unparsable_line', + 'uninsertable_line' ); CREATE TABLE entities( diff --git a/test/test-ingest.cpp b/test/test-ingest.cpp index 9b77567..efc7bc9 100644 --- a/test/test-ingest.cpp +++ b/test/test-ingest.cpp @@ -263,8 +263,8 @@ BOOST_AUTO_TEST_CASE(ParkLogLineOnError) constexpr std::string_view LOGLINE_BAD_VERB = R"LOG(git.randomdan.homeip.net 98.82.40.168 1755561576768318 CAUSEPARK "/repo/gentoobrowse-api/commit/gentoobrowse-api/unittests/fixtures/756569aa764177340726dd3d40b41d89b11b20c7/app-crypt/pdfcrack/Manifest" "?h=gentoobrowse-api-0.9.1&id=a2ed3fd30333721accd4b697bfcb6cc4165c7714" HTTP/1.1 200 1884 107791 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot) Chrome/119.0.6045.214 Safari/537.36")LOG"; BOOST_REQUIRE_NO_THROW(ingestLogLine(LOGLINE_BAD_VERB)); - BOOST_CHECK_EQUAL(linesParked, 1); - BOOST_CHECK(existingEntities.empty()); + BOOST_CHECK_EQUAL(linesParked, 0); + BOOST_CHECK_EQUAL(existingEntities.size(), 1); } BOOST_AUTO_TEST_CASE(IngestParked, *boost::unit_test::depends_on("I/ParkLogLine")) |
