From 3ce6cf305572709332d7329674ec45c987a093ad Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Sat, 11 Apr 2026 18:12:51 +0100 Subject: Introduce MD5 from libmd, use it for hashing queuedLines for park path --- Jamroot.jam | 1 + src/Jamfile.jam | 1 + src/ingestor.cpp | 30 ++++++++++++++++++++++++++++-- src/logTypes.hpp | 2 ++ 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/Jamroot.jam b/Jamroot.jam index 2f38c94..37e4a0e 100644 --- a/Jamroot.jam +++ b/Jamroot.jam @@ -13,6 +13,7 @@ lib adhocutil : : shared : : /usr/include/adhocutil glib lib dbppcore : : shared : : /usr/include/dbpp adhocutil ; lib dbpp-postgresql : : shared : : /usr/include/dbpp-postgresql dbppcore ; lib z : : shared ; +lib md : : shared ; project webstat : requirements 26 diff --git a/src/Jamfile.jam b/src/Jamfile.jam index 9459dd8..eca0c24 100644 --- a/src/Jamfile.jam +++ b/src/Jamfile.jam @@ -4,6 +4,7 @@ lib webstat : [ glob *.cpp : *_main.cpp ] : ..//dbppcore ../thirdparty//scn ..//z + ..//md ..//curl : : . diff --git a/src/ingestor.cpp b/src/ingestor.cpp index 0659f1f..5ae2487 100644 --- a/src/ingestor.cpp +++ b/src/ingestor.cpp @@ -24,6 +24,32 @@ namespace DB { namespace WebStat { namespace { + using ByteArrayView = std::span; + + auto + bytesToHexRange(const ByteArrayView bytes) + { + constexpr auto HEXN = 16ZU; + return bytes | std::views::transform([](auto byte) { + return std::array {byte / HEXN, byte % HEXN}; + }) | std::views::join + | std::views::transform([](auto nibble) { + return "0123456789abcdef"[nibble]; + }); + } + + EntityHash + makeHash(const std::string_view value) + { + MD5_CTX ctx {}; + MD5Init(&ctx); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - correct for md5ing raw bytes + MD5Update(&ctx, reinterpret_cast(value.data()), value.length()); + EntityHash hash {}; + MD5Final(hash.data(), &ctx); + return hash; + } + Crc32Value crc32(const std::string_view value) { @@ -281,8 +307,8 @@ namespace WebStat { if (queuedLines.empty()) { return std::unexpected(0); } - const std::filesystem::path path { - settings.fallbackDir / std::format("parked-{}.short", crc32(queuedLines.front()))}; + const std::filesystem::path path {settings.fallbackDir + / std::format("parked-{:s}.short", bytesToHexRange(makeHash(queuedLines.front())))}; if (auto parked = FilePtr(fopen(path.c_str(), "w"))) { fprintf(parked.get(), "%zu\n", queuedLines.size()); for (const auto & line : queuedLines) { diff --git a/src/logTypes.hpp b/src/logTypes.hpp index 71393b2..6556d5c 100644 --- a/src/logTypes.hpp +++ b/src/logTypes.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -35,6 +36,7 @@ namespace WebStat { }; using Crc32Value = uint32_t; + using EntityHash = std::array; using Entity = std::tuple; } -- cgit v1.3