summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDan Goodliffe <dan@randomdan.homeip.net>2025-12-20 15:16:25 +0000
committerDan Goodliffe <dan@randomdan.homeip.net>2025-12-20 15:16:25 +0000
commit30a9d45f5322a63c2adf1de7a248ba3a9a0c3903 (patch)
treea384473d3be0d5f3fc8c358c827b6cf158d187b3 /src
parentf8276d22b6501e3d6164f9b26df94d2d57f597e5 (diff)
downloadwebstat-30a9d45f5322a63c2adf1de7a248ba3a9a0c3903.tar.bz2
webstat-30a9d45f5322a63c2adf1de7a248ba3a9a0c3903.tar.xz
webstat-30a9d45f5322a63c2adf1de7a248ba3a9a0c3903.zip
Add job for puring old access log entries from the databaseHEADmain
Diffstat (limited to 'src')
-rw-r--r--src/ingestor.cpp21
-rw-r--r--src/ingestor.hpp9
-rw-r--r--src/sql.cpp4
-rw-r--r--src/sql.hpp1
-rw-r--r--src/sql/accessLogPurgeOld.sql11
-rw-r--r--src/webstat_logger_main.cpp10
6 files changed, 56 insertions, 0 deletions
diff --git a/src/ingestor.cpp b/src/ingestor.cpp
index 0c9ce4e..44107ce 100644
--- a/src/ingestor.cpp
+++ b/src/ingestor.cpp
@@ -221,6 +221,7 @@ namespace WebStat {
}
};
runJobAsNeeded(&Ingestor::jobIngestParkedLines, lastRunIngestParkedLines, settings.freqIngestParkedLines);
+ runJobAsNeeded(&Ingestor::jobPurgeOldLogs, lastRunPurgeOldLogs, settings.freqPurgeOldLogs);
}
void
@@ -260,6 +261,26 @@ namespace WebStat {
std::filesystem::remove(path);
}
+ unsigned int
+ Ingestor::jobPurgeOldLogs()
+ {
+ auto dbconn = dbpool->get();
+ const auto stopAt = JobLastRunTime::clock::now() + settings.purgeDeleteMaxTime;
+ const auto purge = dbconn->modify(SQL::ACCESS_LOG_PURGE_OLD, SQL::ACCESS_LOG_PURGE_OLD_OPTS);
+ purge->bindParam(0, settings.purgeDeleteMax);
+ purge->bindParam(1, std::format("{} days", settings.purgeDaysToKeep));
+ unsigned int purgedTotal {};
+ while (stopAt > JobLastRunTime::clock::now()) {
+ const auto purged = purge->execute();
+ purgedTotal += purged;
+ if (purged < settings.purgeDeleteMax) {
+ break;
+ }
+ std::this_thread::sleep_for(settings.purgeDeletePause);
+ }
+ return purgedTotal;
+ }
+
template<typename... T>
Ingestor::NewEntities
Ingestor::newEntities(const std::tuple<T...> & values) const
diff --git a/src/ingestor.hpp b/src/ingestor.hpp
index 3e25938..a19c8ec 100644
--- a/src/ingestor.hpp
+++ b/src/ingestor.hpp
@@ -17,6 +17,7 @@ namespace WebStat {
using namespace std::chrono_literals;
struct IngestorSettings : Settings {
+ // NOLINTBEGIN(readability-magic-numbers)
std::string dbConnStr = "dbname=webstat user=webstat";
std::string userAgentAPI = "https://useragentstring.com";
std::filesystem::path fallbackDir = "/var/log/webstat";
@@ -24,6 +25,12 @@ namespace WebStat {
unsigned int dbKeep = 2;
int idleJobsAfter = duration_cast<milliseconds>(1min).count();
minutes freqIngestParkedLines = 30min;
+ minutes freqPurgeOldLogs = 6h;
+ unsigned int purgeDaysToKeep = 61; // ~2 months
+ unsigned int purgeDeleteMax = 10'000;
+ minutes purgeDeleteMaxTime = 5min;
+ seconds purgeDeletePause = 3s;
+ // NOLINTEND(readability-magic-numbers)
};
class Ingestor {
@@ -48,6 +55,7 @@ namespace WebStat {
void runJobsIdle();
void jobIngestParkedLines();
+ unsigned int jobPurgeOldLogs();
template<typename... T> void storeLogLine(DB::Connection *, const std::tuple<T...> &) const;
@@ -64,6 +72,7 @@ namespace WebStat {
using JobLastRunTime = std::chrono::system_clock::time_point;
JobLastRunTime lastRunIngestParkedLines;
+ JobLastRunTime lastRunPurgeOldLogs;
private:
static constexpr size_t MAX_NEW_ENTITIES = 6;
diff --git a/src/sql.cpp b/src/sql.cpp
index 9c0d992..da95f18 100644
--- a/src/sql.cpp
+++ b/src/sql.cpp
@@ -9,6 +9,9 @@ namespace WebStat::SQL {
const std::string ACCESS_LOG_INSERT {
#embed "sql/accessLogInsert.sql"
};
+ const std::string ACCESS_LOG_PURGE_OLD {
+#embed "sql/accessLogPurgeOld.sql"
+ };
const std::string ENTITY_INSERT {
#embed "sql/entityInsert.sql"
};
@@ -21,6 +24,7 @@ namespace WebStat::SQL {
#define HASH_OPTS(VAR) \
const DB::CommandOptionsPtr VAR##_OPTS = std::make_shared<DB::CommandOptions>(std::hash<std::string> {}(VAR))
HASH_OPTS(ACCESS_LOG_INSERT);
+ HASH_OPTS(ACCESS_LOG_PURGE_OLD);
HASH_OPTS(ENTITY_INSERT);
HASH_OPTS(ENTITY_UPDATE_DETAIL);
HASH_OPTS(HOST_UPSERT);
diff --git a/src/sql.hpp b/src/sql.hpp
index f0dfb05..1a12823 100644
--- a/src/sql.hpp
+++ b/src/sql.hpp
@@ -9,6 +9,7 @@ namespace WebStat::SQL {
extern const DB::CommandOptionsPtr Name##_OPTS
EMBED_DECLARE(ACCESS_LOG_INSERT);
+ EMBED_DECLARE(ACCESS_LOG_PURGE_OLD);
EMBED_DECLARE(ENTITY_INSERT);
EMBED_DECLARE(ENTITY_UPDATE_DETAIL);
EMBED_DECLARE(HOST_UPSERT);
diff --git a/src/sql/accessLogPurgeOld.sql b/src/sql/accessLogPurgeOld.sql
new file mode 100644
index 0000000..8379018
--- /dev/null
+++ b/src/sql/accessLogPurgeOld.sql
@@ -0,0 +1,11 @@
+WITH scope AS (
+ SELECT id
+ FROM access_log
+ ORDER BY id
+ LIMIT ?
+), scoperange AS (
+ SELECT min(id) minid, max(id) maxid
+ FROM scope)
+DELETE FROM access_log USING scoperange
+WHERE request_time < CURRENT_DATE - ?::interval
+ AND access_log.id BETWEEN scoperange.minid AND scoperange.maxid
diff --git a/src/webstat_logger_main.cpp b/src/webstat_logger_main.cpp
index c859abf..7f4d9b4 100644
--- a/src/webstat_logger_main.cpp
+++ b/src/webstat_logger_main.cpp
@@ -59,6 +59,16 @@ main(int argc, char ** argv)
"Run idle when there's no activity for this period (ms)")
("job.parked.freq", po::value(&settings.freqIngestParkedLines)->default_value(settings.freqIngestParkedLines),
"How often to check for and import parked log lines")
+ ("job.purge.freq", po::value(&settings.freqPurgeOldLogs)->default_value(settings.freqPurgeOldLogs),
+ "How often to purge old access log entries from the database")
+ ("job.purge.days", po::value(&settings.purgeDaysToKeep)->default_value(settings.purgeDaysToKeep),
+ "How many days of access log entries to keep")
+ ("job.purge.max", po::value(&settings.purgeDeleteMax)->default_value(settings.purgeDeleteMax),
+ "Maximum number of access log entries to delete in a single operation")
+ ("job.purge.time", po::value(&settings.purgeDeleteMaxTime)->default_value(settings.purgeDeleteMaxTime),
+ "Maximum amount of time to spending purging old access log entries before continuing to ingest")
+ ("job.purge.pause", po::value(&settings.purgeDeletePause)->default_value(settings.purgeDeletePause),
+ "Time to pause for between repeated exections of a delete operation")
;
// clang-format on
po::variables_map optVars;