diff options
author | Dan Goodliffe <dan@randomdan.homeip.net> | 2025-09-01 19:35:57 +0100 |
---|---|---|
committer | Dan Goodliffe <dan@randomdan.homeip.net> | 2025-09-01 20:03:42 +0100 |
commit | 53e0f24faade5b543f46e7d89544d92f3709bf7a (patch) | |
tree | a20adc0c8357ed9a6346f8609a0dcfbbd9dd5d5e | |
parent | 53a42351b97db6c84fb930b9eb9c89d694956bca (diff) | |
download | webstat-53e0f24faade5b543f46e7d89544d92f3709bf7a.tar.bz2 webstat-53e0f24faade5b543f46e7d89544d92f3709bf7a.tar.xz webstat-53e0f24faade5b543f46e7d89544d92f3709bf7a.zip |
Tag entities with their typewebstat-0.1.1
-rw-r--r-- | src/ingestor.cpp | 34 | ||||
-rw-r--r-- | src/logTypes.hpp | 11 | ||||
-rw-r--r-- | src/schema.sql | 2 | ||||
-rw-r--r-- | src/sql/entityInsert.sql | 2 |
4 files changed, 34 insertions, 15 deletions
diff --git a/src/ingestor.cpp b/src/ingestor.cpp index 00bad64..ea62d54 100644 --- a/src/ingestor.cpp +++ b/src/ingestor.cpp @@ -15,7 +15,7 @@ namespace DB { void DB::Command::bindParam(unsigned int idx, const WebStat::Entity & entity) { - bindParamI(idx, entity.first); + bindParamI(idx, std::get<0>(entity)); } } @@ -29,15 +29,17 @@ namespace WebStat { } Entity - addCrc32(const std::string_view value) + toEntity(const std::string_view value, const EntityType type) { - return {crc32(value), value}; + return {crc32(value), type, value}; } std::optional<Entity> - addCrc32o(const std::optional<std::string_view> value) + toEntityo(const std::optional<std::string_view> value, const EntityType type) { - return value.transform(addCrc32); + return value.transform([type](auto && value) { + return toEntity(value, type); + }); } auto @@ -45,9 +47,11 @@ namespace WebStat { { return std::apply( [](auto &&... value) { - return std::make_tuple(addCrc32(value...[0]), value...[1], value...[2], value...[3], - addCrc32(value...[4]), addCrc32o(value...[5]), value...[6], value...[7], value...[8], - value...[9], addCrc32o(value...[10]), addCrc32o(value...[11])); + return std::make_tuple(toEntity(value...[0], EntityType::VirtualHost), value...[1], value...[2], + value...[3], toEntity(value...[4], EntityType::Path), + toEntityo(value...[5], EntityType::QueryString), value...[6], value...[7], value...[8], + value...[9], toEntityo(value...[10], EntityType::Referrer), + toEntityo(value...[11], EntityType::UserAgent)); }, values); } @@ -58,7 +62,7 @@ namespace WebStat { { storeEntities(dbpool->get().get(), { - std::make_pair(hostnameId, hostname), + std::make_tuple(hostnameId, EntityType::Host, hostname), }); } @@ -118,7 +122,7 @@ namespace WebStat { visit( [this, &next]<typename X>(const X & entity) { auto addNewIfReqd = [&next, this](auto && entity) mutable { - if (!existingEntities.contains(entity.first)) { + if (!existingEntities.contains(std::get<0>(entity))) { *next++ = entity; } return 0; @@ -138,13 +142,17 @@ namespace WebStat { void Ingestor::storeEntities(DB::Connection * dbconn, const std::span<const std::optional<Entity>> values) const { + static constexpr std::array ENTITY_TYPE_VALUES { + "host", "virtual_host", "path", "query_string", "referrer", "user_agent"}; + auto insert = dbconn->modify(SQL::ENTITY_INSERT, SQL::ENTITY_INSERT_OPTS); std::ranges::for_each(values | std::views::take_while(&std::optional<Entity>::has_value), [this, insert = insert.get()](auto && entity) { - insert->bindParamI(0, entity->first); - insert->bindParamS(1, entity->second); + insert->bindParamI(0, std::get<0>(*entity)); + insert->bindParamS(1, ENTITY_TYPE_VALUES[std::to_underlying(std::get<1>(*entity))]); + insert->bindParamS(2, std::get<2>(*entity)); insert->execute(); - existingEntities.emplace(entity->first); + existingEntities.emplace(std::get<0>(*entity)); }); } diff --git a/src/logTypes.hpp b/src/logTypes.hpp index 7439733..687f43b 100644 --- a/src/logTypes.hpp +++ b/src/logTypes.hpp @@ -22,8 +22,17 @@ namespace WebStat { bool operator<=>(const CLFString &) const = default; }; + enum class EntityType : std::uint8_t { + Host, + VirtualHost, + Path, + QueryString, + Referrer, + UserAgent, + }; + using Crc32Value = uint32_t; - using Entity = std::pair<Crc32Value, std::string_view>; + using Entity = std::tuple<Crc32Value, EntityType, std::string_view>; } namespace scn { diff --git a/src/schema.sql b/src/schema.sql index 48aacb0..eddea2d 100644 --- a/src/schema.sql +++ b/src/schema.sql @@ -1,9 +1,11 @@ CREATE TYPE http_verb AS ENUM('GET', 'HEAD', 'OPTIONS', 'TRACE', 'PUT', 'DELETE', 'POST', 'PATCH', 'CONNECT'); CREATE TYPE protocol AS ENUM('HTTP/1.0', 'HTTP/1.1', 'HTTP/1.2', 'HTTP/1.3', 'HTTP/2.0', 'HTTPS/3.0'); +CREATE TYPE entity AS ENUM('host', 'virtual_host', 'path', 'query_string', 'referrer', 'user_agent'); CREATE TABLE entities ( id bigint NOT NULL, value text NOT NULL, + type entity NOT NULL, CONSTRAINT pk_entities PRIMARY KEY(id), CONSTRAINT uni_entities_value UNIQUE(value) diff --git a/src/sql/entityInsert.sql b/src/sql/entityInsert.sql index ac443e3..451e660 100644 --- a/src/sql/entityInsert.sql +++ b/src/sql/entityInsert.sql @@ -1 +1 @@ -INSERT INTO entities(id, value) VALUES(?, ?) ON CONFLICT DO NOTHING +INSERT INTO entities(id, type, value) VALUES(?, ?, ?) ON CONFLICT DO NOTHING |