summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Goodliffe <dan@randomdan.homeip.net>2026-01-17 19:40:47 +0000
committerDan Goodliffe <dan@randomdan.homeip.net>2026-01-17 20:33:37 +0000
commit34051da2f27ffa40d0b6d20ae891a497fe73bfe5 (patch)
tree3f54e8ef762d980fd159f5fcee8ccdfe4a1b5b07
parent9f1dfacb0f236da5f0413ac20e3376388df7d798 (diff)
downloadwebstat-34051da2f27ffa40d0b6d20ae891a497fe73bfe5.tar.bz2
webstat-34051da2f27ffa40d0b6d20ae891a497fe73bfe5.tar.xz
webstat-34051da2f27ffa40d0b6d20ae891a497fe73bfe5.zip
Attempt to save uninsertable log lines to the entities table
If that fails, we still park them as before, such as when the DB is unavailable. Those which are saved as entities require investigation why they couldn't be saved, much like UnparsableLines.
-rw-r--r--src/ingestor.cpp28
-rw-r--r--src/logTypes.hpp1
-rw-r--r--src/schema.sql3
-rw-r--r--test/test-ingest.cpp4
4 files changed, 25 insertions, 11 deletions
diff --git a/src/ingestor.cpp b/src/ingestor.cpp
index 44107ce..81642be 100644
--- a/src/ingestor.cpp
+++ b/src/ingestor.cpp
@@ -180,15 +180,26 @@ namespace WebStat {
linesParsed++;
const auto values = crc32ScanValues(result->values());
NewEntityIds ids;
- {
- std::optional<DB::TransactionScope> dbtx;
- if (const auto newEnts = newEntities(values); newEnts.front()) {
- dbtx.emplace(*dbconn);
- ids = storeEntities(dbconn, newEnts);
+ try {
+ {
+ std::optional<DB::TransactionScope> dbtx;
+ if (const auto newEnts = newEntities(values); newEnts.front()) {
+ dbtx.emplace(*dbconn);
+ ids = storeEntities(dbconn, newEnts);
+ }
+ storeLogLine(dbconn, values);
+ }
+ rememberNewEntityIds(ids);
+ }
+ catch (const DB::Error & originalError) {
+ try {
+ const auto uninsertableLine = ToEntity<EntityType::UninsertableLine> {}(line);
+ rememberNewEntityIds(storeEntities(dbconn, {uninsertableLine}));
+ }
+ catch (const std::exception &) {
+ throw originalError;
}
- storeLogLine(dbconn, values);
}
- rememberNewEntityIds(ids);
}
else {
linesDiscarded++;
@@ -310,7 +321,7 @@ namespace WebStat {
Ingestor::NewEntityIds
Ingestor::storeEntities(DB::Connection * dbconn, const std::span<const std::optional<Entity>> values) const
{
- static constexpr std::array<std::pair<std::string_view, void (Ingestor::*)(const Entity &) const>, 7>
+ static constexpr std::array<std::pair<std::string_view, void (Ingestor::*)(const Entity &) const>, 8>
ENTITY_TYPE_VALUES {{
{"host", nullptr},
{"virtual_host", nullptr},
@@ -319,6 +330,7 @@ namespace WebStat {
{"referrer", nullptr},
{"user_agent", &Ingestor::onNewUserAgent},
{"unparsable_line", nullptr},
+ {"uninsertable_line", nullptr},
}};
auto insert = dbconn->modify(SQL::ENTITY_INSERT, SQL::ENTITY_INSERT_OPTS);
diff --git a/src/logTypes.hpp b/src/logTypes.hpp
index 0262060..f9395d1 100644
--- a/src/logTypes.hpp
+++ b/src/logTypes.hpp
@@ -30,6 +30,7 @@ namespace WebStat {
Referrer,
UserAgent,
UnparsableLine,
+ UninsertableLine,
};
using Crc32Value = uint32_t;
diff --git a/src/schema.sql b/src/schema.sql
index d04535e..7648b79 100644
--- a/src/schema.sql
+++ b/src/schema.sql
@@ -27,7 +27,8 @@ CREATE TYPE entity AS ENUM(
'query_string',
'referrer',
'user_agent',
- 'unparsable_line'
+ 'unparsable_line',
+ 'uninsertable_line'
);
CREATE TABLE entities(
diff --git a/test/test-ingest.cpp b/test/test-ingest.cpp
index 9b77567..efc7bc9 100644
--- a/test/test-ingest.cpp
+++ b/test/test-ingest.cpp
@@ -263,8 +263,8 @@ BOOST_AUTO_TEST_CASE(ParkLogLineOnError)
constexpr std::string_view LOGLINE_BAD_VERB
= R"LOG(git.randomdan.homeip.net 98.82.40.168 1755561576768318 CAUSEPARK "/repo/gentoobrowse-api/commit/gentoobrowse-api/unittests/fixtures/756569aa764177340726dd3d40b41d89b11b20c7/app-crypt/pdfcrack/Manifest" "?h=gentoobrowse-api-0.9.1&id=a2ed3fd30333721accd4b697bfcb6cc4165c7714" HTTP/1.1 200 1884 107791 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot) Chrome/119.0.6045.214 Safari/537.36")LOG";
BOOST_REQUIRE_NO_THROW(ingestLogLine(LOGLINE_BAD_VERB));
- BOOST_CHECK_EQUAL(linesParked, 1);
- BOOST_CHECK(existingEntities.empty());
+ BOOST_CHECK_EQUAL(linesParked, 0);
+ BOOST_CHECK_EQUAL(existingEntities.size(), 1);
}
BOOST_AUTO_TEST_CASE(IngestParked, *boost::unit_test::depends_on("I/ParkLogLine"))