summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Goodliffe <dan@randomdan.homeip.net>2015-10-06 17:24:19 +0100
committerDan Goodliffe <dan@randomdan.homeip.net>2015-10-06 17:24:19 +0100
commit4cdef3be8ed0728e577e397f241eca96e2c07680 (patch)
tree7d1da8129e5182e71f8dc681082b9e2bc05c7a74
parentRevert "Compat fix for Project2 XML parse changes" (diff)
downloadgentoobrowse-1.1.1.tar.bz2
gentoobrowse-1.1.1.tar.xz
gentoobrowse-1.1.1.zip
Support specificing the encoding of a file in files table.gentoobrowse-1.1.1
Add helper function to read file contents with optional encoding override. Use liberally.
-rw-r--r--gentoobrowse/console/changelogs.xml2
-rw-r--r--gentoobrowse/console/newsimport.xml2
-rw-r--r--gentoobrowse/console/packageimport.xml8
-rw-r--r--gentoobrowse/console/packagelicenses.xml5
-rw-r--r--gentoobrowse/console/packagemasks.xml2
-rw-r--r--gentoobrowse/console/use.global.xml2
-rw-r--r--gentoobrowse/console/use.grouped.xml2
-rw-r--r--gentoobrowse/console/use.local.xml2
-rw-r--r--gentoobrowse/datasources/schema.sql12
9 files changed, 23 insertions, 14 deletions
diff --git a/gentoobrowse/console/changelogs.xml b/gentoobrowse/console/changelogs.xml
index 16fe5bf..0cdc351 100644
--- a/gentoobrowse/console/changelogs.xml
+++ b/gentoobrowse/console/changelogs.xml
@@ -23,7 +23,7 @@
REGEXP_MATCHES(r, '(\d{1,2} *(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w* 2\d{3}); (.+)? &lt;(.+@[^ &gt;]+)[&gt;\s] (?:[^:]*:)? (.*)', 'i') l
FROM (
SELECT repoid, pathparts[1] cat, pathparts[2] pkg, REGEXP_REPLACE(r, '\s+', ' ', 'g') r, ROW_NUMBER() OVER() n
- FROM files f, REGEXP_SPLIT_TO_TABLE(PG_READ_FILE(filename), '\n\s*\n') r
+ FROM files f, REGEXP_SPLIT_TO_TABLE(f.filecontent, '\n\s*\n') r
WHERE filetypeid = 2
AND (f.cachedat IS NULL OR f.cachedat != f.moddate)) f
) f, packages p, categories c
diff --git a/gentoobrowse/console/newsimport.xml b/gentoobrowse/console/newsimport.xml
index 3e9c041..6b6c039 100644
--- a/gentoobrowse/console/newsimport.xml
+++ b/gentoobrowse/console/newsimport.xml
@@ -20,7 +20,7 @@
(SELECT ARRAY_AGG(REGEXP_REPLACE(PARA, '\s+', ' ', 'g')) FROM (SELECT REGEXP_SPLIT_TO_TABLE(p[2], '\n\n') para) p) body,
(SELECT ARRAY_AGG(url) FROM (SELECT (REGEXP_MATCHES(p[3], '(\w+://[^\s]+)', 'g'))[1] url) u) urls
FROM (
- SELECT pathparts[3] newsid, REGEXP_MATCHES(PG_READ_FILE(filename), '^(.*?)\n\n(.*?)(\n(\n\[[\d]+\] \w+://[^ ]+)*)?$') p
+ SELECT pathparts[3] newsid, REGEXP_MATCHES(f.filecontent, '^(.*?)\n\n(.*?)(\n(\n\[[\d]+\] \w+://[^ ]+)*)?$') p
FROM files f
WHERE f.filetypeid = 11
AND (f.cachedat IS NULL OR f.cachedat != f.moddate)
diff --git a/gentoobrowse/console/packageimport.xml b/gentoobrowse/console/packageimport.xml
index 0c1d7df..54b1a62 100644
--- a/gentoobrowse/console/packageimport.xml
+++ b/gentoobrowse/console/packageimport.xml
@@ -8,7 +8,7 @@
LEFT OUTER JOIN (
SELECT f.pathparts[1] AS name, RANK() OVER(PARTITION BY f.pathparts[1] ORDER BY repoid DESC) r,
TRIM(REGEXP_REPLACE(CAST((XPATH('/catmetadata/longdescription[@lang="en"]/text()', doc))[1] AS TEXT), '\s+', ' ', 'g')) summary
- FROM files f, XMLPARSE(DOCUMENT pg_read_file(filename)) doc
+ FROM files f, XMLPARSE(DOCUMENT f.filecontent) doc
WHERE filetypeid = 10) m ON m.name = c.name
</sql>
<columns>
@@ -26,11 +26,11 @@
FROM CROSSTAB($$
SELECT fileid, a, SUBSTRING(md FROM '=(.*)') v
FROM (
- SELECT f.fileid, f.filename, a
+ SELECT f.fileid, f.filecontent, a
FROM files f, unnest(array['DEFINED_PHASES', 'DEPEND', 'DESCRIPTION', 'EAPI', 'HOMEPAGE', 'IUSE', 'KEYWORDS',
'LICENSE', 'PDEPEND', 'PROPERTIES', 'RDEPEND', 'REQUIRED_USE', 'RESTRICT', 'SLOT', 'SRC_URI']) a
WHERE filetypeid = 1
- ) f LEFT OUTER JOIN REGEXP_SPLIT_TO_TABLE(PG_READ_FILE(f.filename), '\n') md
+ ) f LEFT OUTER JOIN REGEXP_SPLIT_TO_TABLE(filecontent, '\n') md
ON SPLIT_PART(md, '=' ,1) = a
ORDER BY FILEID, a $$) AS p(fileid INT, defined_phases TEXT, depend TEXT, description TEXT, eapi text, homepage TEXT,
iuse TEXT, keywords TEXT, license TEXT, pdepend TEXT, properties TEXT, rdepend TEXT, required_use TEXT,
@@ -58,7 +58,7 @@
CAST((XPATH('/pkgmetadata/longdescription[not(@lang)]/text()', doc))[1] AS TEXT) longdesc,
CAST((XPATH('/pkgmetadata/herd/text()', doc))[1] AS TEXT) herd
FROM (
- SELECT f.repoid, categoryid, f.pathparts[2] pkgname, XMLPARSE(DOCUMENT PG_READ_FILE(f.filename)) doc
+ SELECT f.repoid, categoryid, f.pathparts[2] pkgname, XMLPARSE(DOCUMENT f.filecontent) doc
FROM files f, categories c
WHERE filetypeid = 4
AND f.pathparts[1] = c.name) x) x
diff --git a/gentoobrowse/console/packagelicenses.xml b/gentoobrowse/console/packagelicenses.xml
index 80777a7..baa88b0 100644
--- a/gentoobrowse/console/packagelicenses.xml
+++ b/gentoobrowse/console/packagelicenses.xml
@@ -4,16 +4,15 @@
<project2:sqlmerge name="update" datasource="postgres" targettable="license">
<updatewhere>a.name IN (SELECT o.pathparts[2] FROM files o WHERE o.cachedat IS NULL OR o.cachedat != o.moddate)</updatewhere>
<sql>
- SELECT f.pathparts[2] AS name, PG_READ_FILE(f.filename) legalbumph
+ SELECT f.pathparts[2] AS name, f.filecontent legalbumph
FROM files f
WHERE (f.cachedat IS NULL OR f.cachedat != f.moddate)
- AND filename NOT LIKE '%.pdf'
AND f.filetypeid = 7
</sql>
<columns>
<name key="true" />
<legalbumph />
- </columns>
+ </columns>
</project2:sqlmerge>
<project2:sqltask name="update" datasource="postgres">
<sql>
diff --git a/gentoobrowse/console/packagemasks.xml b/gentoobrowse/console/packagemasks.xml
index 2e5e2e3..261ee9b 100644
--- a/gentoobrowse/console/packagemasks.xml
+++ b/gentoobrowse/console/packagemasks.xml
@@ -9,7 +9,7 @@
FROM (
SELECT row_number() over() n,
regexp_matches(r, '^# ([^&lt;]+)? ?&lt;(.+?@[^&gt;]+)&gt; \((\d+ *(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec) \d+)\)\n((?:#[^\n]*\n)+)(.*)$', 'i') m
- FROM files f, REGEXP_SPLIT_TO_TABLE(PG_READ_FILE(filename), '\n\s*\n') r
+ FROM files f, REGEXP_SPLIT_TO_TABLE(f.filecontent, '\n\s*\n') r
WHERE filetypeid = 3) m
</sql>
</project2:sqltask>
diff --git a/gentoobrowse/console/use.global.xml b/gentoobrowse/console/use.global.xml
index 9ee2cfc..e437514 100644
--- a/gentoobrowse/console/use.global.xml
+++ b/gentoobrowse/console/use.global.xml
@@ -5,7 +5,7 @@
<sql>
SELECT u.ud[1] AS use, u.ud[2] AS description
FROM (SELECT regexp_matches(u, '([^ ]+) - (.+)') ud
- FROM files f, regexp_split_to_table(pg_read_file(f.filename), '\n') u
+ FROM files f, regexp_split_to_table(f.filecontent, '\n') u
WHERE f.filetypeid = 5) u
</sql>
<columns>
diff --git a/gentoobrowse/console/use.grouped.xml b/gentoobrowse/console/use.grouped.xml
index d54bb90..ea93357 100644
--- a/gentoobrowse/console/use.grouped.xml
+++ b/gentoobrowse/console/use.grouped.xml
@@ -15,7 +15,7 @@
<sql>
SELECT ug.usegroupid, u.ud[1] AS use, MIN(u.ud[2]) AS description
FROM (SELECT split_part(pathparts[3], '.', 1) AS name, regexp_matches(u, '([^ ]+) - (.*)') ud
- FROM files f, regexp_split_to_table(pg_read_file(f.filename), '\n') u
+ FROM files f, regexp_split_to_table(f.filecontent, '\n') u
WHERE f.filetypeid = 9) u, use_groups ug
WHERE ug.name = u.name
GROUP BY ug.usegroupid, u.ud[1]
diff --git a/gentoobrowse/console/use.local.xml b/gentoobrowse/console/use.local.xml
index 882ee82..e9e5f52 100644
--- a/gentoobrowse/console/use.local.xml
+++ b/gentoobrowse/console/use.local.xml
@@ -5,7 +5,7 @@
<sql>
SELECT p.packageid, u.ud[3] AS use, u.ud[4] AS description
FROM (SELECT regexp_matches(u, '([^/]+)/([^:]+):([^ ]+) - (.+)') ud
- FROM files f, regexp_split_to_table(pg_read_file(f.filename), '\n') u
+ FROM files f, regexp_split_to_table(f.filecontent, '\n') u
WHERE f.filetypeid = 6) u, categories c, packages p
WHERE c.name = u.ud[1]
AND p.name = u.ud[2]
diff --git a/gentoobrowse/datasources/schema.sql b/gentoobrowse/datasources/schema.sql
index dc6919d..54e1351 100644
--- a/gentoobrowse/datasources/schema.sql
+++ b/gentoobrowse/datasources/schema.sql
@@ -331,9 +331,19 @@ CREATE TABLE files (
filetypeid integer NOT NULL,
repoid integer NOT NULL,
filesize integer NOT NULL,
- pathparts text[] NOT NULL
+ pathparts text[] NOT NULL,
+ encoding text
);
ALTER TABLE files OWNER TO gentoo;
+-- Name: filecontent(files); Type: FUNCTION; Schema: gentoobrowse; Owner: gentoo
+CREATE FUNCTION filecontent(f files) RETURNS text
+ LANGUAGE plpgsql IMMUTABLE
+ AS $$
+begin
+ return CONVERT_FROM(PG_READ_BINARY_FILE(f.filename), COALESCE(f.encoding, 'utf-8'));
+end
+$$;
+ALTER FUNCTION gentoobrowse.filecontent(f files) OWNER TO gentoo;
-- Name: filetypes; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace:
CREATE TABLE filetypes (
filetypeid integer NOT NULL,