From 10b19d747805e4fd1323455dae419091500efc18 Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Sat, 23 Aug 2025 15:28:56 +0100 Subject: Add helpers for hashing values extracted from log input zlib's crc32 used here, the interface is a bit C and as a result a bit casty, but it'll work. --- src/Jamfile.jam | 1 + src/ingestor.cpp | 35 +++++++++++++++++++++++++++++++++++ src/ingestor.hpp | 1 + src/logTypes.hpp | 3 +++ 4 files changed, 40 insertions(+) (limited to 'src') diff --git a/src/Jamfile.jam b/src/Jamfile.jam index 40ec28e..637ddb8 100644 --- a/src/Jamfile.jam +++ b/src/Jamfile.jam @@ -1,6 +1,7 @@ lib webstat : ingestor.cpp logTypes.cpp : . ../thirdparty//scn + ..//z : : . ; diff --git a/src/ingestor.cpp b/src/ingestor.cpp index 5724b33..17310c2 100644 --- a/src/ingestor.cpp +++ b/src/ingestor.cpp @@ -1,8 +1,43 @@ #include "ingestor.hpp" #include #include +#include +#include namespace WebStat { + namespace { + Crc32Value + crc32(const std::string_view value) + { + return static_cast(::crc32(::crc32(0, Z_NULL, 0), reinterpret_cast(value.data()), + static_cast(value.length()))); + } + + Entity + addCrc32(const std::string_view value) + { + return {crc32(value), value}; + } + + std::optional + addCrc32o(const std::optional value) + { + return value.transform(addCrc32); + } + + auto + crc32ScanValues(const Ingestor::ScanValues & values) + { + return std::apply( + [](auto &&... value) { + return std::make_tuple(addCrc32(value...[0]), value...[1], value...[2], value...[3], + addCrc32(value...[4]), addCrc32o(value...[5]), value...[6], value...[7], value...[8], + value...[9], addCrc32o(value...[10]), addCrc32o(value...[11])); + }, + values); + } + } + Ingestor::ScanResult Ingestor::scanLogLine(std::string_view input) { diff --git a/src/ingestor.hpp b/src/ingestor.hpp index 97ce9f9..3bb9ddd 100644 --- a/src/ingestor.hpp +++ b/src/ingestor.hpp @@ -10,6 +10,7 @@ namespace WebStat { using ScanResult = decltype(scn::scan(std::declval(), "")); + using ScanValues = std::remove_cvref_t()->values())>; [[nodiscard]] static ScanResult scanLogLine(std::string_view); diff --git a/src/logTypes.hpp b/src/logTypes.hpp index d4f1b7b..7439733 100644 --- a/src/logTypes.hpp +++ b/src/logTypes.hpp @@ -21,6 +21,9 @@ namespace WebStat { using std::optional::operator=; bool operator<=>(const CLFString &) const = default; }; + + using Crc32Value = uint32_t; + using Entity = std::pair; } namespace scn { -- cgit v1.2.3