summaryrefslogtreecommitdiff
path: root/src/ingestor.hpp
diff options
context:
space:
mode:
authorDan Goodliffe <dan.goodliffe@octal.co.uk>2026-04-15 12:03:21 +0100
committerDan Goodliffe <dan.goodliffe@octal.co.uk>2026-04-15 12:05:20 +0100
commit5b2166496e5f3ff2c4276e0b5b28f109c70673d5 (patch)
tree93c351f6ef7d3d60d60247d5dbf531496f14a06c /src/ingestor.hpp
parent3ce6cf305572709332d7329674ec45c987a093ad (diff)
downloadwebstat-5b2166496e5f3ff2c4276e0b5b28f109c70673d5.tar.bz2
webstat-5b2166496e5f3ff2c4276e0b5b28f109c70673d5.tar.xz
webstat-5b2166496e5f3ff2c4276e0b5b28f109c70673d5.zip
Replace use of crc32 for entity id
Entity value is MD5 hashed same as DB unique key, but the id itself is now taken from the DB primary key which is sequence generated.
Diffstat (limited to 'src/ingestor.hpp')
-rw-r--r--src/ingestor.hpp15
1 files changed, 7 insertions, 8 deletions
diff --git a/src/ingestor.hpp b/src/ingestor.hpp
index b2e0fed..195f325 100644
--- a/src/ingestor.hpp
+++ b/src/ingestor.hpp
@@ -8,7 +8,7 @@
#include <connection_fwd.h>
#include <cstdio>
#include <expected>
-#include <flat_set>
+#include <flat_map>
#include <future>
#include <scn/scan.h>
#include <span>
@@ -79,7 +79,7 @@ namespace WebStat {
DB::ConnectionPoolPtr dbpool;
mutable Stats stats {};
- std::flat_set<Crc32Value> existingEntities;
+ std::flat_map<EntityHash, EntityId> existingEntities;
LineBatch queuedLines;
bool terminated = false;
@@ -100,11 +100,10 @@ namespace WebStat {
Job purgeOldLogs;
private:
- static constexpr size_t MAX_NEW_ENTITIES = 6;
- using NewEntityIds = std::array<std::optional<Crc32Value>, MAX_NEW_ENTITIES>;
- NewEntityIds storeEntities(DB::Connection *, std::span<const std::optional<Entity>>) const;
- using NewEntities = std::array<std::optional<Entity>, MAX_NEW_ENTITIES>;
- template<typename... T> NewEntities newEntities(const std::tuple<T...> &) const;
+ template<typename... T> static std::vector<Entity *> entities(std::tuple<T...> &);
+ void fillKnownEntities(std::span<Entity *>) const;
+ void storeNewEntities(DB::Connection *, std::span<Entity *>) const;
+ void storeNewEntity(DB::Connection *, Entity &) const;
void onNewUserAgent(const Entity &) const;
void handleCurlOperations();
void logStats() const;
@@ -120,7 +119,7 @@ namespace WebStat {
[[gnu::format(printf, 3, 4)]] virtual void log(int level, const char * msgfmt, ...) const = 0;
using CurlOperations = std::map<CURL *, std::unique_ptr<CurlOperation>>;
- uint32_t hostnameId;
+ EntityId hostnameId;
CurlMultiPtr curl;
mutable CurlOperations curlOperations;
std::thread::id mainThread;