1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
|
CREATE TYPE http_verb AS ENUM(
'GET',
'HEAD',
'OPTIONS',
'TRACE',
'PUT',
'DELETE',
'POST',
'PATCH',
'CONNECT',
'PROPFIND'
);
CREATE TYPE protocol AS ENUM(
'HTTP/1.0',
'HTTP/1.1',
'HTTP/1.2',
'HTTP/1.3',
'HTTP/2.0',
'HTTPS/3.0'
);
CREATE TYPE entity AS ENUM(
'host',
'virtual_host',
'path',
'query_string',
'referrer',
'user_agent',
'unparsable_line',
'uninsertable_line',
'content_type'
);
CREATE TABLE entities(
id integer GENERATED ALWAYS AS IDENTITY,
value text NOT NULL,
type entity NOT NULL,
detail jsonb,
CONSTRAINT pk_entities PRIMARY KEY (id)
);
CREATE UNIQUE INDEX uni_entities_value ON entities(MD5(value));
CREATE OR REPLACE FUNCTION entity(newValue text, newType entity)
RETURNS integer
AS $$
DECLARE
now timestamp without time zone;
recid integer;
BEGIN
IF newValue IS NULL THEN
RETURN NULL;
END IF;
INSERT INTO entities(value, type)
SELECT
newValue,
newType
WHERE
NOT EXISTS (
SELECT
FROM
entities
WHERE
md5(value) = md5(newValue))
ON CONFLICT
DO NOTHING
RETURNING
id INTO recid;
IF recid IS NULL THEN
SELECT
id INTO recid
FROM
entities
WHERE
md5(value) = md5(newValue);
END IF;
RETURN recid;
END;
$$
LANGUAGE plpgSQL
RETURNS NULL ON NULL INPUT;
CREATE TABLE access_log(
id bigint GENERATED ALWAYS AS IDENTITY,
hostname integer NOT NULL,
virtual_host integer NOT NULL,
remoteip inet NOT NULL,
request_time timestamp(6) NOT NULL,
method http_verb NOT NULL,
protocol protocol NOT NULL,
path integer NOT NULL,
query_string integer,
status smallint NOT NULL,
size integer NOT NULL,
duration interval second(6) NOT NULL,
referrer integer,
user_agent integer,
content_type integer,
CONSTRAINT pk_access_log PRIMARY KEY (id),
CONSTRAINT fk_access_log_hostname FOREIGN KEY (hostname) REFERENCES entities(id) ON UPDATE CASCADE,
CONSTRAINT fk_access_log_virtualhost FOREIGN KEY (virtual_host) REFERENCES entities(id) ON UPDATE CASCADE,
CONSTRAINT fk_access_log_path FOREIGN KEY (path) REFERENCES entities(id) ON UPDATE CASCADE,
CONSTRAINT fk_access_log_query_string FOREIGN KEY (query_string) REFERENCES entities(id) ON UPDATE CASCADE,
CONSTRAINT fk_access_log_referrer FOREIGN KEY (referrer) REFERENCES entities(id) ON UPDATE CASCADE,
CONSTRAINT fk_access_log_user_agent FOREIGN KEY (user_agent) REFERENCES entities(id) ON UPDATE CASCADE,
CONSTRAINT fk_access_log_content_type FOREIGN KEY (content_type) REFERENCES entities(id) ON UPDATE CASCADE
);
CREATE OR REPLACE VIEW access_log_view AS
SELECT
l.id,
h.id hostname_id,
h.value hostname,
v.id virtual_host_id,
v.value virtual_host,
remoteip::text,
request_time,
method,
protocol,
p.id path_id,
p.value path,
q.id query_string_id,
q.value query_string,
status,
size,
duration,
r.id referrer_id,
r.value referrer,
u.id user_agent_id,
u.value user_agent,
c.id content_type_id,
c.value content_type
FROM
access_log l
LEFT OUTER JOIN entities h ON l.hostname = h.id
LEFT OUTER JOIN entities v ON l.virtual_host = v.id
LEFT OUTER JOIN entities p ON l.path = p.id
LEFT OUTER JOIN entities q ON l.query_string = q.id
LEFT OUTER JOIN entities r ON l.referrer = r.id
LEFT OUTER JOIN entities u ON l.user_agent = u.id
LEFT OUTER JOIN entities c ON l.content_type = c.id;
|