summaryrefslogtreecommitdiff
path: root/src/ingestor.cpp
diff options
context:
space:
mode:
authorDan Goodliffe <dan@randomdan.homeip.net>2025-08-23 15:28:56 +0100
committerDan Goodliffe <dan@randomdan.homeip.net>2025-08-25 16:01:06 +0100
commit10b19d747805e4fd1323455dae419091500efc18 (patch)
treefe4ad542f9d97b35842f4387db4a698d3a7aea35 /src/ingestor.cpp
parentb2416925f8845b70ed25fb4ec7cde8ef11e8c239 (diff)
downloadwebstat-10b19d747805e4fd1323455dae419091500efc18.tar.bz2
webstat-10b19d747805e4fd1323455dae419091500efc18.tar.xz
webstat-10b19d747805e4fd1323455dae419091500efc18.zip
Add helpers for hashing values extracted from log input
zlib's crc32 used here, the interface is a bit C and as a result a bit casty, but it'll work.
Diffstat (limited to 'src/ingestor.cpp')
-rw-r--r--src/ingestor.cpp35
1 files changed, 35 insertions, 0 deletions
diff --git a/src/ingestor.cpp b/src/ingestor.cpp
index 5724b33..17310c2 100644
--- a/src/ingestor.cpp
+++ b/src/ingestor.cpp
@@ -1,8 +1,43 @@
#include "ingestor.hpp"
#include <scn/scan.h>
#include <syslog.h>
+#include <utility>
+#include <zlib.h>
namespace WebStat {
+ namespace {
+ Crc32Value
+ crc32(const std::string_view value)
+ {
+ return static_cast<Crc32Value>(::crc32(::crc32(0, Z_NULL, 0), reinterpret_cast<const Bytef *>(value.data()),
+ static_cast<uInt>(value.length())));
+ }
+
+ Entity
+ addCrc32(const std::string_view value)
+ {
+ return {crc32(value), value};
+ }
+
+ std::optional<Entity>
+ addCrc32o(const std::optional<std::string_view> value)
+ {
+ return value.transform(addCrc32);
+ }
+
+ auto
+ crc32ScanValues(const Ingestor::ScanValues & values)
+ {
+ return std::apply(
+ [](auto &&... value) {
+ return std::make_tuple(addCrc32(value...[0]), value...[1], value...[2], value...[3],
+ addCrc32(value...[4]), addCrc32o(value...[5]), value...[6], value...[7], value...[8],
+ value...[9], addCrc32o(value...[10]), addCrc32o(value...[11]));
+ },
+ values);
+ }
+ }
+
Ingestor::ScanResult
Ingestor::scanLogLine(std::string_view input)
{