diff options
| -rw-r--r-- | src/ingestor.cpp | 28 | ||||
| -rw-r--r-- | src/logTypes.hpp | 1 | ||||
| -rw-r--r-- | src/schema.sql | 3 | ||||
| -rw-r--r-- | test/test-ingest.cpp | 4 |
4 files changed, 25 insertions, 11 deletions
diff --git a/src/ingestor.cpp b/src/ingestor.cpp index 44107ce..81642be 100644 --- a/src/ingestor.cpp +++ b/src/ingestor.cpp @@ -180,15 +180,26 @@ namespace WebStat { linesParsed++; const auto values = crc32ScanValues(result->values()); NewEntityIds ids; - { - std::optional<DB::TransactionScope> dbtx; - if (const auto newEnts = newEntities(values); newEnts.front()) { - dbtx.emplace(*dbconn); - ids = storeEntities(dbconn, newEnts); + try { + { + std::optional<DB::TransactionScope> dbtx; + if (const auto newEnts = newEntities(values); newEnts.front()) { + dbtx.emplace(*dbconn); + ids = storeEntities(dbconn, newEnts); + } + storeLogLine(dbconn, values); + } + rememberNewEntityIds(ids); + } + catch (const DB::Error & originalError) { + try { + const auto uninsertableLine = ToEntity<EntityType::UninsertableLine> {}(line); + rememberNewEntityIds(storeEntities(dbconn, {uninsertableLine})); + } + catch (const std::exception &) { + throw originalError; } - storeLogLine(dbconn, values); } - rememberNewEntityIds(ids); } else { linesDiscarded++; @@ -310,7 +321,7 @@ namespace WebStat { Ingestor::NewEntityIds Ingestor::storeEntities(DB::Connection * dbconn, const std::span<const std::optional<Entity>> values) const { - static constexpr std::array<std::pair<std::string_view, void (Ingestor::*)(const Entity &) const>, 7> + static constexpr std::array<std::pair<std::string_view, void (Ingestor::*)(const Entity &) const>, 8> ENTITY_TYPE_VALUES {{ {"host", nullptr}, {"virtual_host", nullptr}, @@ -319,6 +330,7 @@ namespace WebStat { {"referrer", nullptr}, {"user_agent", &Ingestor::onNewUserAgent}, {"unparsable_line", nullptr}, + {"uninsertable_line", nullptr}, }}; auto insert = dbconn->modify(SQL::ENTITY_INSERT, SQL::ENTITY_INSERT_OPTS); diff --git a/src/logTypes.hpp b/src/logTypes.hpp index 0262060..f9395d1 100644 --- a/src/logTypes.hpp +++ b/src/logTypes.hpp @@ -30,6 +30,7 @@ namespace WebStat { Referrer, UserAgent, UnparsableLine, + UninsertableLine, }; using Crc32Value = uint32_t; diff --git a/src/schema.sql b/src/schema.sql index d04535e..7648b79 100644 --- a/src/schema.sql +++ b/src/schema.sql @@ -27,7 +27,8 @@ CREATE TYPE entity AS ENUM( 'query_string', 'referrer', 'user_agent', - 'unparsable_line' + 'unparsable_line', + 'uninsertable_line' ); CREATE TABLE entities( diff --git a/test/test-ingest.cpp b/test/test-ingest.cpp index 9b77567..efc7bc9 100644 --- a/test/test-ingest.cpp +++ b/test/test-ingest.cpp @@ -263,8 +263,8 @@ BOOST_AUTO_TEST_CASE(ParkLogLineOnError) constexpr std::string_view LOGLINE_BAD_VERB = R"LOG(git.randomdan.homeip.net 98.82.40.168 1755561576768318 CAUSEPARK "/repo/gentoobrowse-api/commit/gentoobrowse-api/unittests/fixtures/756569aa764177340726dd3d40b41d89b11b20c7/app-crypt/pdfcrack/Manifest" "?h=gentoobrowse-api-0.9.1&id=a2ed3fd30333721accd4b697bfcb6cc4165c7714" HTTP/1.1 200 1884 107791 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot) Chrome/119.0.6045.214 Safari/537.36")LOG"; BOOST_REQUIRE_NO_THROW(ingestLogLine(LOGLINE_BAD_VERB)); - BOOST_CHECK_EQUAL(linesParked, 1); - BOOST_CHECK(existingEntities.empty()); + BOOST_CHECK_EQUAL(linesParked, 0); + BOOST_CHECK_EQUAL(existingEntities.size(), 1); } BOOST_AUTO_TEST_CASE(IngestParked, *boost::unit_test::depends_on("I/ParkLogLine")) |
