summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/ingestor.cpp58
-rw-r--r--src/ingestor.hpp10
-rw-r--r--src/util.hpp2
-rw-r--r--test/test-ingest.cpp47
-rw-r--r--test/test-util.hpp2
5 files changed, 116 insertions, 3 deletions
diff --git a/src/ingestor.cpp b/src/ingestor.cpp
index cf49f52..f965d1d 100644
--- a/src/ingestor.cpp
+++ b/src/ingestor.cpp
@@ -142,7 +142,7 @@ namespace WebStat {
handleCurlOperations();
}
else if (!interesting) {
- // do idle job things
+ runJobsIdle();
}
}
while (!curlOperations.empty() && curl_multi_poll(curl.get(), nullptr, 0, INT_MAX, nullptr) == CURLM_OK) {
@@ -188,6 +188,62 @@ namespace WebStat {
linesParked++;
}
+ void
+ Ingestor::runJobsIdle()
+ {
+ const auto now = JobLastRunTime::clock::now();
+ auto runJobAsNeeded = [this, now](auto job, JobLastRunTime & lastRun, auto freq) {
+ try {
+ if (lastRun + freq < now) {
+ (this->*job)();
+ lastRun = now;
+ }
+ }
+ catch (const std::exception &) {
+ // Error, retry in half the frequency
+ lastRun = now - (freq / 2);
+ }
+ };
+ runJobAsNeeded(&Ingestor::jobIngestParkedLines, lastRunIngestParkedLines, settings.freqIngestParkedLines);
+ }
+
+ void
+ Ingestor::jobIngestParkedLines()
+ {
+ for (auto pathIter = std::filesystem::directory_iterator {settings.fallbackDir};
+ pathIter != std::filesystem::directory_iterator {}; ++pathIter) {
+ if (scn::scan<Crc32Value>(pathIter->path().filename().string(), "parked-{}.log")) {
+ jobIngestParkedLine(pathIter);
+ }
+ }
+ }
+
+ void
+ Ingestor::jobIngestParkedLine(const std::filesystem::directory_iterator & pathIter)
+ {
+ jobIngestParkedLine(pathIter->path(), pathIter->file_size());
+ }
+
+ void
+ Ingestor::jobIngestParkedLine(const std::filesystem::path & path, uintmax_t size)
+ {
+ if (std::ifstream parked {path}) {
+ std::string line;
+ line.resize_and_overwrite(size, [&parked](char * content, size_t size) {
+ parked.read(content, static_cast<std::streamsize>(size));
+ return static_cast<size_t>(parked.tellg());
+ });
+ if (line.length() < size) {
+ throw std::system_error {errno, std::generic_category(), "Short read of parked file"};
+ }
+ ingestLogLine(dbpool->get().get(), line);
+ }
+ else {
+ throw std::system_error {errno, std::generic_category(), strerror(errno)};
+ }
+ std::filesystem::remove(path);
+ }
+
template<typename... T>
Ingestor::NewEntities
Ingestor::newEntities(const std::tuple<T...> & values) const
diff --git a/src/ingestor.hpp b/src/ingestor.hpp
index afd4a0f..a20071e 100644
--- a/src/ingestor.hpp
+++ b/src/ingestor.hpp
@@ -23,6 +23,7 @@ namespace WebStat {
unsigned int dbMax = 4;
unsigned int dbKeep = 2;
int idleJobsAfter = duration_cast<milliseconds>(1min).count();
+ minutes freqIngestParkedLines = 30min;
};
class Ingestor {
@@ -44,6 +45,9 @@ namespace WebStat {
void ingestLogLine(std::string_view);
void ingestLogLine(DB::Connection *, std::string_view);
void parkLogLine(std::string_view);
+ void runJobsIdle();
+
+ void jobIngestParkedLines();
template<typename... T> void storeLogLine(DB::Connection *, const std::tuple<T...> &) const;
@@ -57,6 +61,9 @@ namespace WebStat {
size_t linesDiscarded = 0;
size_t linesParked = 0;
+ using JobLastRunTime = std::chrono::system_clock::time_point;
+ JobLastRunTime lastRunIngestParkedLines;
+
private:
static constexpr size_t MAX_NEW_ENTITIES = 6;
void storeEntities(DB::Connection *, std::span<const std::optional<Entity>>) const;
@@ -64,6 +71,9 @@ namespace WebStat {
template<typename... T> NewEntities newEntities(const std::tuple<T...> &) const;
void handleCurlOperations();
+ void jobIngestParkedLine(const std::filesystem::directory_iterator &);
+ void jobIngestParkedLine(const std::filesystem::path &, uintmax_t size);
+
using CurlOperations = std::map<CURL *, std::unique_ptr<CurlOperation>>;
mutable std::flat_set<Crc32Value> existingEntities;
uint32_t hostnameId;
diff --git a/src/util.hpp b/src/util.hpp
index d712d3b..ad55604 100644
--- a/src/util.hpp
+++ b/src/util.hpp
@@ -23,6 +23,8 @@ namespace WebStat {
values);
}
+ using FilePtr = std::unique_ptr<std::FILE, DeleteWith<&fclose>>;
+
template<typename... T>
void
bindMany(const DB::CommandPtr & cmd, unsigned int firstParam, T &&... param)
diff --git a/test/test-ingest.cpp b/test/test-ingest.cpp
index 73a37c6..388c440 100644
--- a/test/test-ingest.cpp
+++ b/test/test-ingest.cpp
@@ -259,6 +259,53 @@ BOOST_AUTO_TEST_CASE(ParkLogLineOnError)
BOOST_CHECK_EQUAL(linesParked, 1);
}
+BOOST_AUTO_TEST_CASE(IngestParked, *boost::unit_test::depends_on("I/ParkLogLine"))
+{
+ parkLogLine(LOGLINE1);
+ BOOST_REQUIRE_EQUAL(linesParked, 1);
+ BOOST_REQUIRE_EQUAL(linesParsed, 0);
+ jobIngestParkedLines();
+ BOOST_CHECK_EQUAL(linesParsed, 1);
+ BOOST_CHECK_EQUAL(linesDiscarded, 0);
+ BOOST_CHECK(!std::filesystem::exists(settings.fallbackDir / "parked-3377916038.log"));
+}
+
+BOOST_AUTO_TEST_CASE(IngestParkedJob, *boost::unit_test::depends_on("I/IngestParked"))
+{
+ const auto now = JobLastRunTime::clock::now();
+ lastRunIngestParkedLines = now - 1s;
+ parkLogLine(LOGLINE1);
+
+ runJobsIdle();
+ BOOST_REQUIRE_EQUAL(linesParked, 1);
+ BOOST_REQUIRE_EQUAL(linesParsed, 0);
+ BOOST_CHECK_EQUAL(lastRunIngestParkedLines, now - 1s);
+
+ lastRunIngestParkedLines = now - settings.freqIngestParkedLines + 2s;
+ BOOST_REQUIRE_EQUAL(linesParked, 1);
+ BOOST_REQUIRE_EQUAL(linesParsed, 0);
+ BOOST_CHECK_EQUAL(lastRunIngestParkedLines, now - settings.freqIngestParkedLines + 2s);
+
+ lastRunIngestParkedLines = now - settings.freqIngestParkedLines - 1s;
+ runJobsIdle();
+ BOOST_CHECK_EQUAL(linesParsed, 1);
+ BOOST_CHECK_EQUAL(linesDiscarded, 0);
+ BOOST_CHECK_GE(lastRunIngestParkedLines, now);
+ BOOST_CHECK(!std::filesystem::exists(settings.fallbackDir / "parked-3377916038.log"));
+}
+
+BOOST_AUTO_TEST_CASE(JobErrorRescheduler, *boost::unit_test::depends_on("I/IngestParkedJob"))
+{
+ const auto now = JobLastRunTime::clock::now();
+ lastRunIngestParkedLines = now - settings.freqIngestParkedLines - 1s;
+ parkLogLine(LOGLINE1);
+ std::filesystem::permissions(settings.fallbackDir / "parked-3377916038.log", std::filesystem::perms::owner_write);
+ runJobsIdle();
+ BOOST_CHECK(std::filesystem::exists(settings.fallbackDir / "parked-3377916038.log"));
+ BOOST_CHECK_GE(lastRunIngestParkedLines, now - (settings.freqIngestParkedLines / 2) - 1s);
+ BOOST_CHECK_LE(lastRunIngestParkedLines, now - (settings.freqIngestParkedLines / 2) + 1s);
+}
+
BOOST_AUTO_TEST_CASE(FetchMockUserAgentDetail)
{
const auto uaDetailReq = WebStat::curlGetUserAgentDetail(0,
diff --git a/test/test-util.hpp b/test/test-util.hpp
index ac96ee6..f933cba 100644
--- a/test/test-util.hpp
+++ b/test/test-util.hpp
@@ -16,8 +16,6 @@ namespace WebStat {
#undef XSTR
#undef STR
- using FilePtr = std::unique_ptr<std::FILE, DeleteWith<&fclose>>;
-
struct MockDB : public DB::PluginMock<PQ::Mock> {
MockDB();
};