diff options
| author | Dan Goodliffe <dan@randomdan.homeip.net> | 2026-01-18 01:36:06 +0000 |
|---|---|---|
| committer | Dan Goodliffe <dan@randomdan.homeip.net> | 2026-01-18 01:36:06 +0000 |
| commit | 04acfa679fd846ac829ded5562093b3766c85154 (patch) | |
| tree | a9625eb96c8c4ed246b1f4d172c06d85df577dfe /src | |
| parent | 34051da2f27ffa40d0b6d20ae891a497fe73bfe5 (diff) | |
| download | webstat-main.tar.bz2 webstat-main.tar.xz webstat-main.zip | |
Diffstat (limited to 'src')
| -rw-r--r-- | src/ingestor.cpp | 11 | ||||
| -rw-r--r-- | src/ingestor.hpp | 2 | ||||
| -rw-r--r-- | src/logTypes.hpp | 1 | ||||
| -rw-r--r-- | src/schema.sql | 14 | ||||
| -rw-r--r-- | src/sql/accessLogInsert.sql | 4 |
5 files changed, 21 insertions, 11 deletions
diff --git a/src/ingestor.cpp b/src/ingestor.cpp index 81642be..da39c59 100644 --- a/src/ingestor.cpp +++ b/src/ingestor.cpp @@ -54,7 +54,8 @@ namespace WebStat { { static constexpr std::tuple<ToEntity<EntityType::VirtualHost>, std::identity, std::identity, std::identity, ToEntity<EntityType::Path>, ToEntity<EntityType::QueryString>, std::identity, std::identity, - std::identity, std::identity, ToEntity<EntityType::Referrer>, ToEntity<EntityType::UserAgent>> + std::identity, std::identity, ToEntity<EntityType::Referrer>, ToEntity<EntityType::UserAgent>, + ToEntity<EntityType::ContentType>> ENTITY_TYPE_MAP; static constexpr size_t VALUE_COUNT = std::tuple_size_v<Ingestor::ScanValues>; static_assert(VALUE_COUNT == std::tuple_size_v<decltype(ENTITY_TYPE_MAP)>); @@ -99,8 +100,9 @@ namespace WebStat { unsigned int, // size : %B : 1234 unsigned int, // duration : %D : 1234 CLFString, // referrer : "%{Referer}i" : "https://google.com/whatever" or "-" - CLFString // user_agent : "%{User-agent}i" : "Chromium v123.4" or "-" - >(input, R"({} {} {} {:[A-Z]} {} {} {} {} {} {} {} {})"); + CLFString, // user_agent : "%{User-agent}i" : "Chromium v123.4" or "-" + CLFString // content_type : "%{Content-type}o" : "test/plain" or "-" + >(input, R"({} {} {} {:[A-Z]} {} {} {} {} {} {} {} {} {})"); } void @@ -321,7 +323,7 @@ namespace WebStat { Ingestor::NewEntityIds Ingestor::storeEntities(DB::Connection * dbconn, const std::span<const std::optional<Entity>> values) const { - static constexpr std::array<std::pair<std::string_view, void (Ingestor::*)(const Entity &) const>, 8> + static constexpr std::array<std::pair<std::string_view, void (Ingestor::*)(const Entity &) const>, 9> ENTITY_TYPE_VALUES {{ {"host", nullptr}, {"virtual_host", nullptr}, @@ -331,6 +333,7 @@ namespace WebStat { {"user_agent", &Ingestor::onNewUserAgent}, {"unparsable_line", nullptr}, {"uninsertable_line", nullptr}, + {"content_type", nullptr}, }}; auto insert = dbconn->modify(SQL::ENTITY_INSERT, SQL::ENTITY_INSERT_OPTS); diff --git a/src/ingestor.hpp b/src/ingestor.hpp index a19c8ec..67a7a15 100644 --- a/src/ingestor.hpp +++ b/src/ingestor.hpp @@ -43,7 +43,7 @@ namespace WebStat { using ScanResult = decltype(scn::scan<std::string_view, std::string_view, uint64_t, std::string_view, QuotedString, QueryString, std::string_view, unsigned short, unsigned int, unsigned int, CLFString, - CLFString>(std::declval<std::string_view>(), "")); + CLFString, CLFString>(std::declval<std::string_view>(), "")); using ScanValues = std::remove_cvref_t<decltype(std::declval<WebStat::Ingestor::ScanResult>()->values())>; [[nodiscard]] static ScanResult scanLogLine(std::string_view); diff --git a/src/logTypes.hpp b/src/logTypes.hpp index f9395d1..71393b2 100644 --- a/src/logTypes.hpp +++ b/src/logTypes.hpp @@ -31,6 +31,7 @@ namespace WebStat { UserAgent, UnparsableLine, UninsertableLine, + ContentType, }; using Crc32Value = uint32_t; diff --git a/src/schema.sql b/src/schema.sql index 7648b79..8008b3c 100644 --- a/src/schema.sql +++ b/src/schema.sql @@ -28,7 +28,8 @@ CREATE TYPE entity AS ENUM( 'referrer', 'user_agent', 'unparsable_line', - 'uninsertable_line' + 'uninsertable_line', + 'content_type' ); CREATE TABLE entities( @@ -55,13 +56,15 @@ CREATE TABLE access_log( duration interval second(6) NOT NULL, referrer oid, user_agent oid, + content_type oid, CONSTRAINT pk_access_log PRIMARY KEY (id), CONSTRAINT fk_access_log_hostname FOREIGN KEY (hostname) REFERENCES entities(id), CONSTRAINT fk_access_log_virtualhost FOREIGN KEY (virtual_host) REFERENCES entities(id), CONSTRAINT fk_access_log_path FOREIGN KEY (path) REFERENCES entities(id), CONSTRAINT fk_access_log_query_string FOREIGN KEY (query_string) REFERENCES entities(id), CONSTRAINT fk_access_log_referrer FOREIGN KEY (referrer) REFERENCES entities(id), - CONSTRAINT fk_access_log_user_agent FOREIGN KEY (user_agent) REFERENCES entities(id) + CONSTRAINT fk_access_log_user_agent FOREIGN KEY (user_agent) REFERENCES entities(id), + CONSTRAINT fk_access_log_content_type FOREIGN KEY (content_type) REFERENCES entities(id) ); CREATE OR REPLACE VIEW access_log_view AS @@ -85,7 +88,9 @@ SELECT r.id referrer_id, r.value referrer, u.id user_agent_id, - u.value user_agent + u.value user_agent, + c.id content_type_id, + c.value content_type FROM access_log l LEFT OUTER JOIN entities h ON l.hostname = h.id @@ -93,4 +98,5 @@ FROM LEFT OUTER JOIN entities p ON l.path = p.id LEFT OUTER JOIN entities q ON l.query_string = q.id LEFT OUTER JOIN entities r ON l.referrer = r.id - LEFT OUTER JOIN entities u ON l.user_agent = u.id; + LEFT OUTER JOIN entities u ON l.user_agent = u.id + LEFT OUTER JOIN entities c ON l.user_agent = c.id; diff --git a/src/sql/accessLogInsert.sql b/src/sql/accessLogInsert.sql index 42f809b..518045e 100644 --- a/src/sql/accessLogInsert.sql +++ b/src/sql/accessLogInsert.sql @@ -1,3 +1,3 @@ INSERT INTO access_log(hostname, virtual_host, remoteip, request_time, method, path, query_string, protocol, status, - size, duration, referrer, user_agent) - VALUES (?, ?, ?, TO_TIMESTAMP(? / 1000000.0) at time zone 'utc', ?, ?, ?, ?, ?, ?, ? * '1us'::interval, ?, ?) + size, duration, referrer, user_agent, content_type) + VALUES (?, ?, ?, TO_TIMESTAMP(? / 1000000.0) at time zone 'utc', ?, ?, ?, ?, ?, ?, ? * '1us'::interval, ?, ?, ?) |
