From 10b19d747805e4fd1323455dae419091500efc18 Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Sat, 23 Aug 2025 15:28:56 +0100 Subject: Add helpers for hashing values extracted from log input zlib's crc32 used here, the interface is a bit C and as a result a bit casty, but it'll work. --- Jamroot.jam | 2 ++ src/Jamfile.jam | 1 + src/ingestor.cpp | 35 +++++++++++++++++++++++++++++++++++ src/ingestor.hpp | 1 + src/logTypes.hpp | 3 +++ test/test-ingest.cpp | 2 +- 6 files changed, 43 insertions(+), 1 deletion(-) diff --git a/Jamroot.jam b/Jamroot.jam index c2f41d7..5894a7d 100644 --- a/Jamroot.jam +++ b/Jamroot.jam @@ -3,6 +3,8 @@ import testing ; build-project src ; build-project test ; +lib z : : shared ; + project webstat : requirements 26 hidden diff --git a/src/Jamfile.jam b/src/Jamfile.jam index 40ec28e..637ddb8 100644 --- a/src/Jamfile.jam +++ b/src/Jamfile.jam @@ -1,6 +1,7 @@ lib webstat : ingestor.cpp logTypes.cpp : . ../thirdparty//scn + ..//z : : . ; diff --git a/src/ingestor.cpp b/src/ingestor.cpp index 5724b33..17310c2 100644 --- a/src/ingestor.cpp +++ b/src/ingestor.cpp @@ -1,8 +1,43 @@ #include "ingestor.hpp" #include #include +#include +#include namespace WebStat { + namespace { + Crc32Value + crc32(const std::string_view value) + { + return static_cast(::crc32(::crc32(0, Z_NULL, 0), reinterpret_cast(value.data()), + static_cast(value.length()))); + } + + Entity + addCrc32(const std::string_view value) + { + return {crc32(value), value}; + } + + std::optional + addCrc32o(const std::optional value) + { + return value.transform(addCrc32); + } + + auto + crc32ScanValues(const Ingestor::ScanValues & values) + { + return std::apply( + [](auto &&... value) { + return std::make_tuple(addCrc32(value...[0]), value...[1], value...[2], value...[3], + addCrc32(value...[4]), addCrc32o(value...[5]), value...[6], value...[7], value...[8], + value...[9], addCrc32o(value...[10]), addCrc32o(value...[11])); + }, + values); + } + } + Ingestor::ScanResult Ingestor::scanLogLine(std::string_view input) { diff --git a/src/ingestor.hpp b/src/ingestor.hpp index 97ce9f9..3bb9ddd 100644 --- a/src/ingestor.hpp +++ b/src/ingestor.hpp @@ -10,6 +10,7 @@ namespace WebStat { using ScanResult = decltype(scn::scan(std::declval(), "")); + using ScanValues = std::remove_cvref_t()->values())>; [[nodiscard]] static ScanResult scanLogLine(std::string_view); diff --git a/src/logTypes.hpp b/src/logTypes.hpp index d4f1b7b..7439733 100644 --- a/src/logTypes.hpp +++ b/src/logTypes.hpp @@ -21,6 +21,9 @@ namespace WebStat { using std::optional::operator=; bool operator<=>(const CLFString &) const = default; }; + + using Crc32Value = uint32_t; + using Entity = std::pair; } namespace scn { diff --git a/test/test-ingest.cpp b/test/test-ingest.cpp index f7eab19..a998dd3 100644 --- a/test/test-ingest.cpp +++ b/test/test-ingest.cpp @@ -145,7 +145,7 @@ BOOST_TEST_DECORATOR(*boost::unit_test::depends_on("QueryStringsGood")) BOOST_TEST_DECORATOR(*boost::unit_test::depends_on("CLFStringsGood")) BOOST_DATA_TEST_CASE(ExtractFields, - boost::unit_test::data::make>({ + boost::unit_test::data::make>({ {R"LOG(git.randomdan.homeip.net 98.82.40.168 1755561576768318 GET "/repo/gentoobrowse-api/commit/gentoobrowse-api/unittests/fixtures/756569aa764177340726dd3d40b41d89b11b20c7/app-crypt/pdfcrack/Manifest" "?h=gentoobrowse-api-0.9.1&id=a2ed3fd30333721accd4b697bfcb6cc4165c7714" HTTP/1.1 200 1884 107791 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot) Chrome/119.0.6045.214 Safari/537.36")LOG", {"git.randomdan.homeip.net", "98.82.40.168", 1755561576768318, "GET", R"(/repo/gentoobrowse-api/commit/gentoobrowse-api/unittests/fixtures/756569aa764177340726dd3d40b41d89b11b20c7/app-crypt/pdfcrack/Manifest)", -- cgit v1.2.3