diff options
author | Dan Goodliffe <dan@randomdan.homeip.net> | 2025-08-23 15:28:56 +0100 |
---|---|---|
committer | Dan Goodliffe <dan@randomdan.homeip.net> | 2025-08-25 16:01:06 +0100 |
commit | 10b19d747805e4fd1323455dae419091500efc18 (patch) | |
tree | fe4ad542f9d97b35842f4387db4a698d3a7aea35 | |
parent | b2416925f8845b70ed25fb4ec7cde8ef11e8c239 (diff) | |
download | webstat-10b19d747805e4fd1323455dae419091500efc18.tar.bz2 webstat-10b19d747805e4fd1323455dae419091500efc18.tar.xz webstat-10b19d747805e4fd1323455dae419091500efc18.zip |
Add helpers for hashing values extracted from log input
zlib's crc32 used here, the interface is a bit C and as a result a bit
casty, but it'll work.
-rw-r--r-- | Jamroot.jam | 2 | ||||
-rw-r--r-- | src/Jamfile.jam | 1 | ||||
-rw-r--r-- | src/ingestor.cpp | 35 | ||||
-rw-r--r-- | src/ingestor.hpp | 1 | ||||
-rw-r--r-- | src/logTypes.hpp | 3 | ||||
-rw-r--r-- | test/test-ingest.cpp | 2 |
6 files changed, 43 insertions, 1 deletions
diff --git a/Jamroot.jam b/Jamroot.jam index c2f41d7..5894a7d 100644 --- a/Jamroot.jam +++ b/Jamroot.jam @@ -3,6 +3,8 @@ import testing ; build-project src ; build-project test ; +lib z : : <link>shared ; + project webstat : requirements <cxxstd>26 <visibility>hidden diff --git a/src/Jamfile.jam b/src/Jamfile.jam index 40ec28e..637ddb8 100644 --- a/src/Jamfile.jam +++ b/src/Jamfile.jam @@ -1,6 +1,7 @@ lib webstat : ingestor.cpp logTypes.cpp : <include>. <library>../thirdparty//scn + <library>..//z : : <include>. ; diff --git a/src/ingestor.cpp b/src/ingestor.cpp index 5724b33..17310c2 100644 --- a/src/ingestor.cpp +++ b/src/ingestor.cpp @@ -1,8 +1,43 @@ #include "ingestor.hpp" #include <scn/scan.h> #include <syslog.h> +#include <utility> +#include <zlib.h> namespace WebStat { + namespace { + Crc32Value + crc32(const std::string_view value) + { + return static_cast<Crc32Value>(::crc32(::crc32(0, Z_NULL, 0), reinterpret_cast<const Bytef *>(value.data()), + static_cast<uInt>(value.length()))); + } + + Entity + addCrc32(const std::string_view value) + { + return {crc32(value), value}; + } + + std::optional<Entity> + addCrc32o(const std::optional<std::string_view> value) + { + return value.transform(addCrc32); + } + + auto + crc32ScanValues(const Ingestor::ScanValues & values) + { + return std::apply( + [](auto &&... value) { + return std::make_tuple(addCrc32(value...[0]), value...[1], value...[2], value...[3], + addCrc32(value...[4]), addCrc32o(value...[5]), value...[6], value...[7], value...[8], + value...[9], addCrc32o(value...[10]), addCrc32o(value...[11])); + }, + values); + } + } + Ingestor::ScanResult Ingestor::scanLogLine(std::string_view input) { diff --git a/src/ingestor.hpp b/src/ingestor.hpp index 97ce9f9..3bb9ddd 100644 --- a/src/ingestor.hpp +++ b/src/ingestor.hpp @@ -10,6 +10,7 @@ namespace WebStat { using ScanResult = decltype(scn::scan<std::string_view, std::string_view, uint64_t, std::string_view, QuotedString, QueryString, std::string_view, unsigned short, unsigned int, unsigned int, CLFString, CLFString>(std::declval<std::string_view>(), "")); + using ScanValues = std::remove_cvref_t<decltype(std::declval<WebStat::Ingestor::ScanResult>()->values())>; [[nodiscard]] static ScanResult scanLogLine(std::string_view); diff --git a/src/logTypes.hpp b/src/logTypes.hpp index d4f1b7b..7439733 100644 --- a/src/logTypes.hpp +++ b/src/logTypes.hpp @@ -21,6 +21,9 @@ namespace WebStat { using std::optional<std::string>::operator=; bool operator<=>(const CLFString &) const = default; }; + + using Crc32Value = uint32_t; + using Entity = std::pair<Crc32Value, std::string_view>; } namespace scn { diff --git a/test/test-ingest.cpp b/test/test-ingest.cpp index f7eab19..a998dd3 100644 --- a/test/test-ingest.cpp +++ b/test/test-ingest.cpp @@ -145,7 +145,7 @@ BOOST_TEST_DECORATOR(*boost::unit_test::depends_on("QueryStringsGood")) BOOST_TEST_DECORATOR(*boost::unit_test::depends_on("CLFStringsGood")) BOOST_DATA_TEST_CASE(ExtractFields, - boost::unit_test::data::make<ParseData<ScanValues>>({ + boost::unit_test::data::make<ParseData<WebStat::Ingestor::ScanValues>>({ {R"LOG(git.randomdan.homeip.net 98.82.40.168 1755561576768318 GET "/repo/gentoobrowse-api/commit/gentoobrowse-api/unittests/fixtures/756569aa764177340726dd3d40b41d89b11b20c7/app-crypt/pdfcrack/Manifest" "?h=gentoobrowse-api-0.9.1&id=a2ed3fd30333721accd4b697bfcb6cc4165c7714" HTTP/1.1 200 1884 107791 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot) Chrome/119.0.6045.214 Safari/537.36")LOG", {"git.randomdan.homeip.net", "98.82.40.168", 1755561576768318, "GET", R"(/repo/gentoobrowse-api/commit/gentoobrowse-api/unittests/fixtures/756569aa764177340726dd3d40b41d89b11b20c7/app-crypt/pdfcrack/Manifest)", |