From 5b2166496e5f3ff2c4276e0b5b28f109c70673d5 Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Wed, 15 Apr 2026 12:03:21 +0100 Subject: Replace use of crc32 for entity id Entity value is MD5 hashed same as DB unique key, but the id itself is now taken from the DB primary key which is sequence generated. --- src/ingestor.hpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'src/ingestor.hpp') diff --git a/src/ingestor.hpp b/src/ingestor.hpp index b2e0fed..195f325 100644 --- a/src/ingestor.hpp +++ b/src/ingestor.hpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -79,7 +79,7 @@ namespace WebStat { DB::ConnectionPoolPtr dbpool; mutable Stats stats {}; - std::flat_set existingEntities; + std::flat_map existingEntities; LineBatch queuedLines; bool terminated = false; @@ -100,11 +100,10 @@ namespace WebStat { Job purgeOldLogs; private: - static constexpr size_t MAX_NEW_ENTITIES = 6; - using NewEntityIds = std::array, MAX_NEW_ENTITIES>; - NewEntityIds storeEntities(DB::Connection *, std::span>) const; - using NewEntities = std::array, MAX_NEW_ENTITIES>; - template NewEntities newEntities(const std::tuple &) const; + template static std::vector entities(std::tuple &); + void fillKnownEntities(std::span) const; + void storeNewEntities(DB::Connection *, std::span) const; + void storeNewEntity(DB::Connection *, Entity &) const; void onNewUserAgent(const Entity &) const; void handleCurlOperations(); void logStats() const; @@ -120,7 +119,7 @@ namespace WebStat { [[gnu::format(printf, 3, 4)]] virtual void log(int level, const char * msgfmt, ...) const = 0; using CurlOperations = std::map>; - uint32_t hostnameId; + EntityId hostnameId; CurlMultiPtr curl; mutable CurlOperations curlOperations; std::thread::id mainThread; -- cgit v1.3