summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Goodliffe <dan@randomdan.homeip.net>2025-08-23 15:28:56 +0100
committerDan Goodliffe <dan@randomdan.homeip.net>2025-08-25 16:01:06 +0100
commit10b19d747805e4fd1323455dae419091500efc18 (patch)
treefe4ad542f9d97b35842f4387db4a698d3a7aea35
parentb2416925f8845b70ed25fb4ec7cde8ef11e8c239 (diff)
downloadwebstat-10b19d747805e4fd1323455dae419091500efc18.tar.bz2
webstat-10b19d747805e4fd1323455dae419091500efc18.tar.xz
webstat-10b19d747805e4fd1323455dae419091500efc18.zip
Add helpers for hashing values extracted from log input
zlib's crc32 used here, the interface is a bit C and as a result a bit casty, but it'll work.
-rw-r--r--Jamroot.jam2
-rw-r--r--src/Jamfile.jam1
-rw-r--r--src/ingestor.cpp35
-rw-r--r--src/ingestor.hpp1
-rw-r--r--src/logTypes.hpp3
-rw-r--r--test/test-ingest.cpp2
6 files changed, 43 insertions, 1 deletions
diff --git a/Jamroot.jam b/Jamroot.jam
index c2f41d7..5894a7d 100644
--- a/Jamroot.jam
+++ b/Jamroot.jam
@@ -3,6 +3,8 @@ import testing ;
build-project src ;
build-project test ;
+lib z : : <link>shared ;
+
project webstat : requirements
<cxxstd>26
<visibility>hidden
diff --git a/src/Jamfile.jam b/src/Jamfile.jam
index 40ec28e..637ddb8 100644
--- a/src/Jamfile.jam
+++ b/src/Jamfile.jam
@@ -1,6 +1,7 @@
lib webstat : ingestor.cpp logTypes.cpp :
<include>.
<library>../thirdparty//scn
+ <library>..//z
: :
<include>.
;
diff --git a/src/ingestor.cpp b/src/ingestor.cpp
index 5724b33..17310c2 100644
--- a/src/ingestor.cpp
+++ b/src/ingestor.cpp
@@ -1,8 +1,43 @@
#include "ingestor.hpp"
#include <scn/scan.h>
#include <syslog.h>
+#include <utility>
+#include <zlib.h>
namespace WebStat {
+ namespace {
+ Crc32Value
+ crc32(const std::string_view value)
+ {
+ return static_cast<Crc32Value>(::crc32(::crc32(0, Z_NULL, 0), reinterpret_cast<const Bytef *>(value.data()),
+ static_cast<uInt>(value.length())));
+ }
+
+ Entity
+ addCrc32(const std::string_view value)
+ {
+ return {crc32(value), value};
+ }
+
+ std::optional<Entity>
+ addCrc32o(const std::optional<std::string_view> value)
+ {
+ return value.transform(addCrc32);
+ }
+
+ auto
+ crc32ScanValues(const Ingestor::ScanValues & values)
+ {
+ return std::apply(
+ [](auto &&... value) {
+ return std::make_tuple(addCrc32(value...[0]), value...[1], value...[2], value...[3],
+ addCrc32(value...[4]), addCrc32o(value...[5]), value...[6], value...[7], value...[8],
+ value...[9], addCrc32o(value...[10]), addCrc32o(value...[11]));
+ },
+ values);
+ }
+ }
+
Ingestor::ScanResult
Ingestor::scanLogLine(std::string_view input)
{
diff --git a/src/ingestor.hpp b/src/ingestor.hpp
index 97ce9f9..3bb9ddd 100644
--- a/src/ingestor.hpp
+++ b/src/ingestor.hpp
@@ -10,6 +10,7 @@ namespace WebStat {
using ScanResult = decltype(scn::scan<std::string_view, std::string_view, uint64_t, std::string_view,
QuotedString, QueryString, std::string_view, unsigned short, unsigned int, unsigned int, CLFString,
CLFString>(std::declval<std::string_view>(), ""));
+ using ScanValues = std::remove_cvref_t<decltype(std::declval<WebStat::Ingestor::ScanResult>()->values())>;
[[nodiscard]] static ScanResult scanLogLine(std::string_view);
diff --git a/src/logTypes.hpp b/src/logTypes.hpp
index d4f1b7b..7439733 100644
--- a/src/logTypes.hpp
+++ b/src/logTypes.hpp
@@ -21,6 +21,9 @@ namespace WebStat {
using std::optional<std::string>::operator=;
bool operator<=>(const CLFString &) const = default;
};
+
+ using Crc32Value = uint32_t;
+ using Entity = std::pair<Crc32Value, std::string_view>;
}
namespace scn {
diff --git a/test/test-ingest.cpp b/test/test-ingest.cpp
index f7eab19..a998dd3 100644
--- a/test/test-ingest.cpp
+++ b/test/test-ingest.cpp
@@ -145,7 +145,7 @@ BOOST_TEST_DECORATOR(*boost::unit_test::depends_on("QueryStringsGood"))
BOOST_TEST_DECORATOR(*boost::unit_test::depends_on("CLFStringsGood"))
BOOST_DATA_TEST_CASE(ExtractFields,
- boost::unit_test::data::make<ParseData<ScanValues>>({
+ boost::unit_test::data::make<ParseData<WebStat::Ingestor::ScanValues>>({
{R"LOG(git.randomdan.homeip.net 98.82.40.168 1755561576768318 GET "/repo/gentoobrowse-api/commit/gentoobrowse-api/unittests/fixtures/756569aa764177340726dd3d40b41d89b11b20c7/app-crypt/pdfcrack/Manifest" "?h=gentoobrowse-api-0.9.1&id=a2ed3fd30333721accd4b697bfcb6cc4165c7714" HTTP/1.1 200 1884 107791 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot) Chrome/119.0.6045.214 Safari/537.36")LOG",
{"git.randomdan.homeip.net", "98.82.40.168", 1755561576768318, "GET",
R"(/repo/gentoobrowse-api/commit/gentoobrowse-api/unittests/fixtures/756569aa764177340726dd3d40b41d89b11b20c7/app-crypt/pdfcrack/Manifest)",