diff options
author | Dan Goodliffe <dan@randomdan.homeip.net> | 2025-10-06 20:43:52 +0100 |
---|---|---|
committer | Dan Goodliffe <dan@randomdan.homeip.net> | 2025-10-06 20:43:52 +0100 |
commit | 63d6bf7395425be73fd6d9f5c89dc48817afffe6 (patch) | |
tree | 98a98671e7676202dcf24b1ef700a9240b999e78 | |
parent | 5aa9cca0d2ff25c541d7df3b63519c28eb75b656 (diff) | |
download | webstat-63d6bf7395425be73fd6d9f5c89dc48817afffe6.tar.bz2 webstat-63d6bf7395425be73fd6d9f5c89dc48817afffe6.tar.xz webstat-63d6bf7395425be73fd6d9f5c89dc48817afffe6.zip |
Add parked line import jobwebstat-0.2
Periodically, on idle, scan for and import previously parked lines.
-rw-r--r-- | src/ingestor.cpp | 58 | ||||
-rw-r--r-- | src/ingestor.hpp | 10 | ||||
-rw-r--r-- | src/util.hpp | 2 | ||||
-rw-r--r-- | test/test-ingest.cpp | 47 | ||||
-rw-r--r-- | test/test-util.hpp | 2 |
5 files changed, 116 insertions, 3 deletions
diff --git a/src/ingestor.cpp b/src/ingestor.cpp index cf49f52..f965d1d 100644 --- a/src/ingestor.cpp +++ b/src/ingestor.cpp @@ -142,7 +142,7 @@ namespace WebStat { handleCurlOperations(); } else if (!interesting) { - // do idle job things + runJobsIdle(); } } while (!curlOperations.empty() && curl_multi_poll(curl.get(), nullptr, 0, INT_MAX, nullptr) == CURLM_OK) { @@ -188,6 +188,62 @@ namespace WebStat { linesParked++; } + void + Ingestor::runJobsIdle() + { + const auto now = JobLastRunTime::clock::now(); + auto runJobAsNeeded = [this, now](auto job, JobLastRunTime & lastRun, auto freq) { + try { + if (lastRun + freq < now) { + (this->*job)(); + lastRun = now; + } + } + catch (const std::exception &) { + // Error, retry in half the frequency + lastRun = now - (freq / 2); + } + }; + runJobAsNeeded(&Ingestor::jobIngestParkedLines, lastRunIngestParkedLines, settings.freqIngestParkedLines); + } + + void + Ingestor::jobIngestParkedLines() + { + for (auto pathIter = std::filesystem::directory_iterator {settings.fallbackDir}; + pathIter != std::filesystem::directory_iterator {}; ++pathIter) { + if (scn::scan<Crc32Value>(pathIter->path().filename().string(), "parked-{}.log")) { + jobIngestParkedLine(pathIter); + } + } + } + + void + Ingestor::jobIngestParkedLine(const std::filesystem::directory_iterator & pathIter) + { + jobIngestParkedLine(pathIter->path(), pathIter->file_size()); + } + + void + Ingestor::jobIngestParkedLine(const std::filesystem::path & path, uintmax_t size) + { + if (std::ifstream parked {path}) { + std::string line; + line.resize_and_overwrite(size, [&parked](char * content, size_t size) { + parked.read(content, static_cast<std::streamsize>(size)); + return static_cast<size_t>(parked.tellg()); + }); + if (line.length() < size) { + throw std::system_error {errno, std::generic_category(), "Short read of parked file"}; + } + ingestLogLine(dbpool->get().get(), line); + } + else { + throw std::system_error {errno, std::generic_category(), strerror(errno)}; + } + std::filesystem::remove(path); + } + template<typename... T> Ingestor::NewEntities Ingestor::newEntities(const std::tuple<T...> & values) const diff --git a/src/ingestor.hpp b/src/ingestor.hpp index afd4a0f..a20071e 100644 --- a/src/ingestor.hpp +++ b/src/ingestor.hpp @@ -23,6 +23,7 @@ namespace WebStat { unsigned int dbMax = 4; unsigned int dbKeep = 2; int idleJobsAfter = duration_cast<milliseconds>(1min).count(); + minutes freqIngestParkedLines = 30min; }; class Ingestor { @@ -44,6 +45,9 @@ namespace WebStat { void ingestLogLine(std::string_view); void ingestLogLine(DB::Connection *, std::string_view); void parkLogLine(std::string_view); + void runJobsIdle(); + + void jobIngestParkedLines(); template<typename... T> void storeLogLine(DB::Connection *, const std::tuple<T...> &) const; @@ -57,6 +61,9 @@ namespace WebStat { size_t linesDiscarded = 0; size_t linesParked = 0; + using JobLastRunTime = std::chrono::system_clock::time_point; + JobLastRunTime lastRunIngestParkedLines; + private: static constexpr size_t MAX_NEW_ENTITIES = 6; void storeEntities(DB::Connection *, std::span<const std::optional<Entity>>) const; @@ -64,6 +71,9 @@ namespace WebStat { template<typename... T> NewEntities newEntities(const std::tuple<T...> &) const; void handleCurlOperations(); + void jobIngestParkedLine(const std::filesystem::directory_iterator &); + void jobIngestParkedLine(const std::filesystem::path &, uintmax_t size); + using CurlOperations = std::map<CURL *, std::unique_ptr<CurlOperation>>; mutable std::flat_set<Crc32Value> existingEntities; uint32_t hostnameId; diff --git a/src/util.hpp b/src/util.hpp index d712d3b..ad55604 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -23,6 +23,8 @@ namespace WebStat { values); } + using FilePtr = std::unique_ptr<std::FILE, DeleteWith<&fclose>>; + template<typename... T> void bindMany(const DB::CommandPtr & cmd, unsigned int firstParam, T &&... param) diff --git a/test/test-ingest.cpp b/test/test-ingest.cpp index 73a37c6..388c440 100644 --- a/test/test-ingest.cpp +++ b/test/test-ingest.cpp @@ -259,6 +259,53 @@ BOOST_AUTO_TEST_CASE(ParkLogLineOnError) BOOST_CHECK_EQUAL(linesParked, 1); } +BOOST_AUTO_TEST_CASE(IngestParked, *boost::unit_test::depends_on("I/ParkLogLine")) +{ + parkLogLine(LOGLINE1); + BOOST_REQUIRE_EQUAL(linesParked, 1); + BOOST_REQUIRE_EQUAL(linesParsed, 0); + jobIngestParkedLines(); + BOOST_CHECK_EQUAL(linesParsed, 1); + BOOST_CHECK_EQUAL(linesDiscarded, 0); + BOOST_CHECK(!std::filesystem::exists(settings.fallbackDir / "parked-3377916038.log")); +} + +BOOST_AUTO_TEST_CASE(IngestParkedJob, *boost::unit_test::depends_on("I/IngestParked")) +{ + const auto now = JobLastRunTime::clock::now(); + lastRunIngestParkedLines = now - 1s; + parkLogLine(LOGLINE1); + + runJobsIdle(); + BOOST_REQUIRE_EQUAL(linesParked, 1); + BOOST_REQUIRE_EQUAL(linesParsed, 0); + BOOST_CHECK_EQUAL(lastRunIngestParkedLines, now - 1s); + + lastRunIngestParkedLines = now - settings.freqIngestParkedLines + 2s; + BOOST_REQUIRE_EQUAL(linesParked, 1); + BOOST_REQUIRE_EQUAL(linesParsed, 0); + BOOST_CHECK_EQUAL(lastRunIngestParkedLines, now - settings.freqIngestParkedLines + 2s); + + lastRunIngestParkedLines = now - settings.freqIngestParkedLines - 1s; + runJobsIdle(); + BOOST_CHECK_EQUAL(linesParsed, 1); + BOOST_CHECK_EQUAL(linesDiscarded, 0); + BOOST_CHECK_GE(lastRunIngestParkedLines, now); + BOOST_CHECK(!std::filesystem::exists(settings.fallbackDir / "parked-3377916038.log")); +} + +BOOST_AUTO_TEST_CASE(JobErrorRescheduler, *boost::unit_test::depends_on("I/IngestParkedJob")) +{ + const auto now = JobLastRunTime::clock::now(); + lastRunIngestParkedLines = now - settings.freqIngestParkedLines - 1s; + parkLogLine(LOGLINE1); + std::filesystem::permissions(settings.fallbackDir / "parked-3377916038.log", std::filesystem::perms::owner_write); + runJobsIdle(); + BOOST_CHECK(std::filesystem::exists(settings.fallbackDir / "parked-3377916038.log")); + BOOST_CHECK_GE(lastRunIngestParkedLines, now - (settings.freqIngestParkedLines / 2) - 1s); + BOOST_CHECK_LE(lastRunIngestParkedLines, now - (settings.freqIngestParkedLines / 2) + 1s); +} + BOOST_AUTO_TEST_CASE(FetchMockUserAgentDetail) { const auto uaDetailReq = WebStat::curlGetUserAgentDetail(0, diff --git a/test/test-util.hpp b/test/test-util.hpp index ac96ee6..f933cba 100644 --- a/test/test-util.hpp +++ b/test/test-util.hpp @@ -16,8 +16,6 @@ namespace WebStat { #undef XSTR #undef STR - using FilePtr = std::unique_ptr<std::FILE, DeleteWith<&fclose>>; - struct MockDB : public DB::PluginMock<PQ::Mock> { MockDB(); }; |