diff options
author | Dan Goodliffe <dan@randomdan.homeip.net> | 2015-10-06 17:24:19 +0100 |
---|---|---|
committer | Dan Goodliffe <dan@randomdan.homeip.net> | 2015-10-06 17:24:19 +0100 |
commit | 4cdef3be8ed0728e577e397f241eca96e2c07680 (patch) | |
tree | 7d1da8129e5182e71f8dc681082b9e2bc05c7a74 | |
parent | Revert "Compat fix for Project2 XML parse changes" (diff) | |
download | gentoobrowse-1.1.1.tar.bz2 gentoobrowse-1.1.1.tar.xz gentoobrowse-1.1.1.zip |
Support specificing the encoding of a file in files table.gentoobrowse-1.1.1
Add helper function to read file contents with optional encoding override.
Use liberally.
-rw-r--r-- | gentoobrowse/console/changelogs.xml | 2 | ||||
-rw-r--r-- | gentoobrowse/console/newsimport.xml | 2 | ||||
-rw-r--r-- | gentoobrowse/console/packageimport.xml | 8 | ||||
-rw-r--r-- | gentoobrowse/console/packagelicenses.xml | 5 | ||||
-rw-r--r-- | gentoobrowse/console/packagemasks.xml | 2 | ||||
-rw-r--r-- | gentoobrowse/console/use.global.xml | 2 | ||||
-rw-r--r-- | gentoobrowse/console/use.grouped.xml | 2 | ||||
-rw-r--r-- | gentoobrowse/console/use.local.xml | 2 | ||||
-rw-r--r-- | gentoobrowse/datasources/schema.sql | 12 |
9 files changed, 23 insertions, 14 deletions
diff --git a/gentoobrowse/console/changelogs.xml b/gentoobrowse/console/changelogs.xml index 16fe5bf..0cdc351 100644 --- a/gentoobrowse/console/changelogs.xml +++ b/gentoobrowse/console/changelogs.xml @@ -23,7 +23,7 @@ REGEXP_MATCHES(r, '(\d{1,2} *(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w* 2\d{3}); (.+)? <(.+@[^ >]+)[>\s] (?:[^:]*:)? (.*)', 'i') l FROM ( SELECT repoid, pathparts[1] cat, pathparts[2] pkg, REGEXP_REPLACE(r, '\s+', ' ', 'g') r, ROW_NUMBER() OVER() n - FROM files f, REGEXP_SPLIT_TO_TABLE(PG_READ_FILE(filename), '\n\s*\n') r + FROM files f, REGEXP_SPLIT_TO_TABLE(f.filecontent, '\n\s*\n') r WHERE filetypeid = 2 AND (f.cachedat IS NULL OR f.cachedat != f.moddate)) f ) f, packages p, categories c diff --git a/gentoobrowse/console/newsimport.xml b/gentoobrowse/console/newsimport.xml index 3e9c041..6b6c039 100644 --- a/gentoobrowse/console/newsimport.xml +++ b/gentoobrowse/console/newsimport.xml @@ -20,7 +20,7 @@ (SELECT ARRAY_AGG(REGEXP_REPLACE(PARA, '\s+', ' ', 'g')) FROM (SELECT REGEXP_SPLIT_TO_TABLE(p[2], '\n\n') para) p) body, (SELECT ARRAY_AGG(url) FROM (SELECT (REGEXP_MATCHES(p[3], '(\w+://[^\s]+)', 'g'))[1] url) u) urls FROM ( - SELECT pathparts[3] newsid, REGEXP_MATCHES(PG_READ_FILE(filename), '^(.*?)\n\n(.*?)(\n(\n\[[\d]+\] \w+://[^ ]+)*)?$') p + SELECT pathparts[3] newsid, REGEXP_MATCHES(f.filecontent, '^(.*?)\n\n(.*?)(\n(\n\[[\d]+\] \w+://[^ ]+)*)?$') p FROM files f WHERE f.filetypeid = 11 AND (f.cachedat IS NULL OR f.cachedat != f.moddate) diff --git a/gentoobrowse/console/packageimport.xml b/gentoobrowse/console/packageimport.xml index 0c1d7df..54b1a62 100644 --- a/gentoobrowse/console/packageimport.xml +++ b/gentoobrowse/console/packageimport.xml @@ -8,7 +8,7 @@ LEFT OUTER JOIN ( SELECT f.pathparts[1] AS name, RANK() OVER(PARTITION BY f.pathparts[1] ORDER BY repoid DESC) r, TRIM(REGEXP_REPLACE(CAST((XPATH('/catmetadata/longdescription[@lang="en"]/text()', doc))[1] AS TEXT), '\s+', ' ', 'g')) summary - FROM files f, XMLPARSE(DOCUMENT pg_read_file(filename)) doc + FROM files f, XMLPARSE(DOCUMENT f.filecontent) doc WHERE filetypeid = 10) m ON m.name = c.name </sql> <columns> @@ -26,11 +26,11 @@ FROM CROSSTAB($$ SELECT fileid, a, SUBSTRING(md FROM '=(.*)') v FROM ( - SELECT f.fileid, f.filename, a + SELECT f.fileid, f.filecontent, a FROM files f, unnest(array['DEFINED_PHASES', 'DEPEND', 'DESCRIPTION', 'EAPI', 'HOMEPAGE', 'IUSE', 'KEYWORDS', 'LICENSE', 'PDEPEND', 'PROPERTIES', 'RDEPEND', 'REQUIRED_USE', 'RESTRICT', 'SLOT', 'SRC_URI']) a WHERE filetypeid = 1 - ) f LEFT OUTER JOIN REGEXP_SPLIT_TO_TABLE(PG_READ_FILE(f.filename), '\n') md + ) f LEFT OUTER JOIN REGEXP_SPLIT_TO_TABLE(filecontent, '\n') md ON SPLIT_PART(md, '=' ,1) = a ORDER BY FILEID, a $$) AS p(fileid INT, defined_phases TEXT, depend TEXT, description TEXT, eapi text, homepage TEXT, iuse TEXT, keywords TEXT, license TEXT, pdepend TEXT, properties TEXT, rdepend TEXT, required_use TEXT, @@ -58,7 +58,7 @@ CAST((XPATH('/pkgmetadata/longdescription[not(@lang)]/text()', doc))[1] AS TEXT) longdesc, CAST((XPATH('/pkgmetadata/herd/text()', doc))[1] AS TEXT) herd FROM ( - SELECT f.repoid, categoryid, f.pathparts[2] pkgname, XMLPARSE(DOCUMENT PG_READ_FILE(f.filename)) doc + SELECT f.repoid, categoryid, f.pathparts[2] pkgname, XMLPARSE(DOCUMENT f.filecontent) doc FROM files f, categories c WHERE filetypeid = 4 AND f.pathparts[1] = c.name) x) x diff --git a/gentoobrowse/console/packagelicenses.xml b/gentoobrowse/console/packagelicenses.xml index 80777a7..baa88b0 100644 --- a/gentoobrowse/console/packagelicenses.xml +++ b/gentoobrowse/console/packagelicenses.xml @@ -4,16 +4,15 @@ <project2:sqlmerge name="update" datasource="postgres" targettable="license"> <updatewhere>a.name IN (SELECT o.pathparts[2] FROM files o WHERE o.cachedat IS NULL OR o.cachedat != o.moddate)</updatewhere> <sql> - SELECT f.pathparts[2] AS name, PG_READ_FILE(f.filename) legalbumph + SELECT f.pathparts[2] AS name, f.filecontent legalbumph FROM files f WHERE (f.cachedat IS NULL OR f.cachedat != f.moddate) - AND filename NOT LIKE '%.pdf' AND f.filetypeid = 7 </sql> <columns> <name key="true" /> <legalbumph /> - </columns> + </columns> </project2:sqlmerge> <project2:sqltask name="update" datasource="postgres"> <sql> diff --git a/gentoobrowse/console/packagemasks.xml b/gentoobrowse/console/packagemasks.xml index 2e5e2e3..261ee9b 100644 --- a/gentoobrowse/console/packagemasks.xml +++ b/gentoobrowse/console/packagemasks.xml @@ -9,7 +9,7 @@ FROM ( SELECT row_number() over() n, regexp_matches(r, '^# ([^<]+)? ?<(.+?@[^>]+)> \((\d+ *(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec) \d+)\)\n((?:#[^\n]*\n)+)(.*)$', 'i') m - FROM files f, REGEXP_SPLIT_TO_TABLE(PG_READ_FILE(filename), '\n\s*\n') r + FROM files f, REGEXP_SPLIT_TO_TABLE(f.filecontent, '\n\s*\n') r WHERE filetypeid = 3) m </sql> </project2:sqltask> diff --git a/gentoobrowse/console/use.global.xml b/gentoobrowse/console/use.global.xml index 9ee2cfc..e437514 100644 --- a/gentoobrowse/console/use.global.xml +++ b/gentoobrowse/console/use.global.xml @@ -5,7 +5,7 @@ <sql> SELECT u.ud[1] AS use, u.ud[2] AS description FROM (SELECT regexp_matches(u, '([^ ]+) - (.+)') ud - FROM files f, regexp_split_to_table(pg_read_file(f.filename), '\n') u + FROM files f, regexp_split_to_table(f.filecontent, '\n') u WHERE f.filetypeid = 5) u </sql> <columns> diff --git a/gentoobrowse/console/use.grouped.xml b/gentoobrowse/console/use.grouped.xml index d54bb90..ea93357 100644 --- a/gentoobrowse/console/use.grouped.xml +++ b/gentoobrowse/console/use.grouped.xml @@ -15,7 +15,7 @@ <sql> SELECT ug.usegroupid, u.ud[1] AS use, MIN(u.ud[2]) AS description FROM (SELECT split_part(pathparts[3], '.', 1) AS name, regexp_matches(u, '([^ ]+) - (.*)') ud - FROM files f, regexp_split_to_table(pg_read_file(f.filename), '\n') u + FROM files f, regexp_split_to_table(f.filecontent, '\n') u WHERE f.filetypeid = 9) u, use_groups ug WHERE ug.name = u.name GROUP BY ug.usegroupid, u.ud[1] diff --git a/gentoobrowse/console/use.local.xml b/gentoobrowse/console/use.local.xml index 882ee82..e9e5f52 100644 --- a/gentoobrowse/console/use.local.xml +++ b/gentoobrowse/console/use.local.xml @@ -5,7 +5,7 @@ <sql> SELECT p.packageid, u.ud[3] AS use, u.ud[4] AS description FROM (SELECT regexp_matches(u, '([^/]+)/([^:]+):([^ ]+) - (.+)') ud - FROM files f, regexp_split_to_table(pg_read_file(f.filename), '\n') u + FROM files f, regexp_split_to_table(f.filecontent, '\n') u WHERE f.filetypeid = 6) u, categories c, packages p WHERE c.name = u.ud[1] AND p.name = u.ud[2] diff --git a/gentoobrowse/datasources/schema.sql b/gentoobrowse/datasources/schema.sql index dc6919d..54e1351 100644 --- a/gentoobrowse/datasources/schema.sql +++ b/gentoobrowse/datasources/schema.sql @@ -331,9 +331,19 @@ CREATE TABLE files ( filetypeid integer NOT NULL, repoid integer NOT NULL, filesize integer NOT NULL, - pathparts text[] NOT NULL + pathparts text[] NOT NULL, + encoding text ); ALTER TABLE files OWNER TO gentoo; +-- Name: filecontent(files); Type: FUNCTION; Schema: gentoobrowse; Owner: gentoo +CREATE FUNCTION filecontent(f files) RETURNS text + LANGUAGE plpgsql IMMUTABLE + AS $$ +begin + return CONVERT_FROM(PG_READ_BINARY_FILE(f.filename), COALESCE(f.encoding, 'utf-8')); +end +$$; +ALTER FUNCTION gentoobrowse.filecontent(f files) OWNER TO gentoo; -- Name: filetypes; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace: CREATE TABLE filetypes ( filetypeid integer NOT NULL, |