1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
|
CREATE TYPE http_verb AS ENUM(
'GET',
'HEAD',
'OPTIONS',
'TRACE',
'PUT',
'DELETE',
'POST',
'PATCH',
'CONNECT',
'PROPFIND'
);
CREATE TYPE protocol AS ENUM(
'HTTP/1.0',
'HTTP/1.1',
'HTTP/1.2',
'HTTP/1.3',
'HTTP/2.0',
'HTTPS/3.0'
);
CREATE TYPE entity AS ENUM(
'host',
'virtual_host',
'path',
'query_string',
'referrer',
'user_agent',
'unparsable_line',
'uninsertable_line',
'content_type'
);
CREATE TABLE entities(
id integer GENERATED ALWAYS AS IDENTITY,
value text NOT NULL,
type entity NOT NULL,
detail jsonb,
CONSTRAINT pk_entities PRIMARY KEY (id)
);
CREATE UNIQUE INDEX uni_entities_value ON entities(MD5(value));
CREATE OR REPLACE FUNCTION entity(newValue text, newType entity)
RETURNS TABLE(
id integer,
nulldetail boolean
)
AS $$
DECLARE
now timestamp without time zone;
recid integer;
nulldetail boolean;
BEGIN
IF newValue IS NULL THEN
RETURN query
VALUES (NULL,
NULL);
END IF;
INSERT INTO entities(value, type)
SELECT
newValue,
newType
WHERE
NOT EXISTS (
SELECT
FROM
entities
WHERE
md5(value) = md5(newValue))
ON CONFLICT
DO NOTHING
RETURNING
entities.id,
entities.detail IS NULL INTO recid,
nulldetail;
IF recid IS NULL THEN
RETURN query
SELECT
e.id,
e.detail IS NULL
FROM
entities e
WHERE
md5(e.value) = md5(newValue);
END IF;
RETURN query
VALUES (recid,
nulldetail);
END;
$$
LANGUAGE plpgSQL
RETURNS NULL ON NULL INPUT;
CREATE TABLE access_log(
id bigint GENERATED ALWAYS AS IDENTITY,
hostname integer NOT NULL,
virtual_host integer NOT NULL,
remoteip inet NOT NULL,
request_time timestamp(6) NOT NULL,
method http_verb NOT NULL,
protocol protocol NOT NULL,
path integer NOT NULL,
query_string integer,
status smallint NOT NULL,
size integer NOT NULL,
duration interval second(6) NOT NULL,
referrer integer,
user_agent integer,
content_type integer,
CONSTRAINT pk_access_log PRIMARY KEY (id),
CONSTRAINT fk_access_log_hostname FOREIGN KEY (hostname) REFERENCES entities(id) ON UPDATE CASCADE,
CONSTRAINT fk_access_log_virtualhost FOREIGN KEY (virtual_host) REFERENCES entities(id) ON UPDATE CASCADE,
CONSTRAINT fk_access_log_path FOREIGN KEY (path) REFERENCES entities(id) ON UPDATE CASCADE,
CONSTRAINT fk_access_log_query_string FOREIGN KEY (query_string) REFERENCES entities(id) ON UPDATE CASCADE,
CONSTRAINT fk_access_log_referrer FOREIGN KEY (referrer) REFERENCES entities(id) ON UPDATE CASCADE,
CONSTRAINT fk_access_log_user_agent FOREIGN KEY (user_agent) REFERENCES entities(id) ON UPDATE CASCADE,
CONSTRAINT fk_access_log_content_type FOREIGN KEY (content_type) REFERENCES entities(id) ON UPDATE CASCADE
);
CREATE INDEX idx_access_log_content_type ON access_log(content_type);
CREATE INDEX idx_access_log_hostname ON access_log(hostname);
CREATE INDEX idx_access_log_path ON access_log(path);
CREATE INDEX idx_access_log_query_string ON access_log(query_string);
CREATE INDEX idx_access_log_referrer ON access_log(referrer);
CREATE INDEX idx_access_log_request_time ON access_log USING BRIN(request_time) WITH (autosummarize = TRUE);
CREATE INDEX idx_access_log_user_agent ON access_log(user_agent);
CREATE INDEX idx_access_log_virtual_host ON access_log(virtual_host);
CREATE OR REPLACE VIEW access_log_view AS
SELECT
l.id,
h.id hostname_id,
h.value hostname,
v.id virtual_host_id,
v.value virtual_host,
remoteip::text,
request_time,
method,
protocol,
p.id path_id,
p.value path,
q.id query_string_id,
q.value query_string,
status,
size,
duration,
r.id referrer_id,
r.value referrer,
u.id user_agent_id,
u.value user_agent,
c.id content_type_id,
c.value content_type
FROM
access_log l
LEFT OUTER JOIN entities h ON l.hostname = h.id
LEFT OUTER JOIN entities v ON l.virtual_host = v.id
LEFT OUTER JOIN entities p ON l.path = p.id
LEFT OUTER JOIN entities q ON l.query_string = q.id
LEFT OUTER JOIN entities r ON l.referrer = r.id
LEFT OUTER JOIN entities u ON l.user_agent = u.id
LEFT OUTER JOIN entities c ON l.content_type = c.id;
|