diff options
| author | Dan Goodliffe <dan@randomdan.homeip.net> | 2015-10-06 17:24:19 +0100 | 
|---|---|---|
| committer | Dan Goodliffe <dan@randomdan.homeip.net> | 2015-10-06 17:24:19 +0100 | 
| commit | 4cdef3be8ed0728e577e397f241eca96e2c07680 (patch) | |
| tree | 7d1da8129e5182e71f8dc681082b9e2bc05c7a74 | |
| parent | Revert "Compat fix for Project2 XML parse changes" (diff) | |
| download | gentoobrowse-4cdef3be8ed0728e577e397f241eca96e2c07680.tar.bz2 gentoobrowse-4cdef3be8ed0728e577e397f241eca96e2c07680.tar.xz gentoobrowse-4cdef3be8ed0728e577e397f241eca96e2c07680.zip | |
Support specificing the encoding of a file in files table.gentoobrowse-1.1.1
Add helper function to read file contents with optional encoding override.
Use liberally.
| -rw-r--r-- | gentoobrowse/console/changelogs.xml | 2 | ||||
| -rw-r--r-- | gentoobrowse/console/newsimport.xml | 2 | ||||
| -rw-r--r-- | gentoobrowse/console/packageimport.xml | 8 | ||||
| -rw-r--r-- | gentoobrowse/console/packagelicenses.xml | 5 | ||||
| -rw-r--r-- | gentoobrowse/console/packagemasks.xml | 2 | ||||
| -rw-r--r-- | gentoobrowse/console/use.global.xml | 2 | ||||
| -rw-r--r-- | gentoobrowse/console/use.grouped.xml | 2 | ||||
| -rw-r--r-- | gentoobrowse/console/use.local.xml | 2 | ||||
| -rw-r--r-- | gentoobrowse/datasources/schema.sql | 12 | 
9 files changed, 23 insertions, 14 deletions
| diff --git a/gentoobrowse/console/changelogs.xml b/gentoobrowse/console/changelogs.xml index 16fe5bf..0cdc351 100644 --- a/gentoobrowse/console/changelogs.xml +++ b/gentoobrowse/console/changelogs.xml @@ -23,7 +23,7 @@  					REGEXP_MATCHES(r, '(\d{1,2} *(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w* 2\d{3}); (.+)? <(.+@[^ >]+)[>\s] (?:[^:]*:)? (.*)', 'i') l  				FROM (  					SELECT repoid, pathparts[1] cat, pathparts[2] pkg, REGEXP_REPLACE(r, '\s+', ' ', 'g') r, ROW_NUMBER() OVER() n -					FROM files f, REGEXP_SPLIT_TO_TABLE(PG_READ_FILE(filename), '\n\s*\n') r +					FROM files f, REGEXP_SPLIT_TO_TABLE(f.filecontent, '\n\s*\n') r  					WHERE filetypeid = 2  					AND (f.cachedat IS NULL OR f.cachedat != f.moddate)) f  				) f, packages p, categories c diff --git a/gentoobrowse/console/newsimport.xml b/gentoobrowse/console/newsimport.xml index 3e9c041..6b6c039 100644 --- a/gentoobrowse/console/newsimport.xml +++ b/gentoobrowse/console/newsimport.xml @@ -20,7 +20,7 @@  				(SELECT ARRAY_AGG(REGEXP_REPLACE(PARA, '\s+', ' ', 'g')) FROM (SELECT REGEXP_SPLIT_TO_TABLE(p[2], '\n\n') para) p) body,  				(SELECT ARRAY_AGG(url) FROM (SELECT (REGEXP_MATCHES(p[3], '(\w+://[^\s]+)', 'g'))[1] url) u) urls  			FROM ( -				SELECT pathparts[3] newsid, REGEXP_MATCHES(PG_READ_FILE(filename), '^(.*?)\n\n(.*?)(\n(\n\[[\d]+\] \w+://[^ ]+)*)?$') p +				SELECT pathparts[3] newsid, REGEXP_MATCHES(f.filecontent, '^(.*?)\n\n(.*?)(\n(\n\[[\d]+\] \w+://[^ ]+)*)?$') p  				FROM files f  				WHERE f.filetypeid = 11  				AND (f.cachedat IS NULL OR f.cachedat != f.moddate) diff --git a/gentoobrowse/console/packageimport.xml b/gentoobrowse/console/packageimport.xml index 0c1d7df..54b1a62 100644 --- a/gentoobrowse/console/packageimport.xml +++ b/gentoobrowse/console/packageimport.xml @@ -8,7 +8,7 @@  				LEFT OUTER JOIN (  					SELECT f.pathparts[1] AS name, RANK() OVER(PARTITION BY f.pathparts[1] ORDER BY repoid DESC) r,  						TRIM(REGEXP_REPLACE(CAST((XPATH('/catmetadata/longdescription[@lang="en"]/text()', doc))[1] AS TEXT), '\s+', ' ', 'g')) summary -					FROM files f, XMLPARSE(DOCUMENT pg_read_file(filename)) doc +					FROM files f, XMLPARSE(DOCUMENT f.filecontent) doc  					WHERE filetypeid = 10) m ON m.name = c.name  		</sql>  		<columns> @@ -26,11 +26,11 @@  				FROM CROSSTAB($$  					SELECT fileid, a, SUBSTRING(md FROM '=(.*)') v  					FROM ( -							SELECT f.fileid, f.filename, a +							SELECT f.fileid, f.filecontent, a  							FROM files f, unnest(array['DEFINED_PHASES', 'DEPEND', 'DESCRIPTION', 'EAPI', 'HOMEPAGE', 'IUSE', 'KEYWORDS',  								'LICENSE', 'PDEPEND', 'PROPERTIES', 'RDEPEND', 'REQUIRED_USE', 'RESTRICT', 'SLOT', 'SRC_URI']) a  							WHERE filetypeid = 1 -							) f LEFT OUTER JOIN REGEXP_SPLIT_TO_TABLE(PG_READ_FILE(f.filename), '\n') md +							) f LEFT OUTER JOIN REGEXP_SPLIT_TO_TABLE(filecontent, '\n') md  									ON SPLIT_PART(md, '=' ,1) = a  					ORDER BY FILEID, a $$) AS p(fileid INT, defined_phases TEXT, depend TEXT, description TEXT, eapi text, homepage TEXT,  						iuse TEXT, keywords TEXT, license TEXT, pdepend TEXT, properties TEXT, rdepend TEXT, required_use TEXT, @@ -58,7 +58,7 @@  									CAST((XPATH('/pkgmetadata/longdescription[not(@lang)]/text()', doc))[1] AS TEXT) longdesc,  									CAST((XPATH('/pkgmetadata/herd/text()', doc))[1] AS TEXT) herd  								FROM ( -									SELECT f.repoid, categoryid, f.pathparts[2] pkgname, XMLPARSE(DOCUMENT PG_READ_FILE(f.filename)) doc +									SELECT f.repoid, categoryid, f.pathparts[2] pkgname, XMLPARSE(DOCUMENT f.filecontent) doc  									FROM files f, categories c  									WHERE filetypeid = 4  									AND f.pathparts[1] = c.name) x) x diff --git a/gentoobrowse/console/packagelicenses.xml b/gentoobrowse/console/packagelicenses.xml index 80777a7..baa88b0 100644 --- a/gentoobrowse/console/packagelicenses.xml +++ b/gentoobrowse/console/packagelicenses.xml @@ -4,16 +4,15 @@  	<project2:sqlmerge name="update" datasource="postgres" targettable="license">  		<updatewhere>a.name IN (SELECT o.pathparts[2] FROM files o WHERE o.cachedat IS NULL OR o.cachedat != o.moddate)</updatewhere>  		<sql> -			SELECT f.pathparts[2] AS name, PG_READ_FILE(f.filename) legalbumph +			SELECT f.pathparts[2] AS name, f.filecontent legalbumph  			FROM files f  			WHERE (f.cachedat IS NULL OR f.cachedat != f.moddate) -			AND filename NOT LIKE '%.pdf'  			AND f.filetypeid = 7  		</sql>  		<columns>  			<name key="true" />  			<legalbumph /> -		</columns>  +		</columns>  	</project2:sqlmerge>  	<project2:sqltask name="update" datasource="postgres">  		<sql> diff --git a/gentoobrowse/console/packagemasks.xml b/gentoobrowse/console/packagemasks.xml index 2e5e2e3..261ee9b 100644 --- a/gentoobrowse/console/packagemasks.xml +++ b/gentoobrowse/console/packagemasks.xml @@ -9,7 +9,7 @@  			FROM (  				SELECT row_number() over() n,  					regexp_matches(r, '^# ([^<]+)? ?<(.+?@[^>]+)> \((\d+ *(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec) \d+)\)\n((?:#[^\n]*\n)+)(.*)$', 'i') m -				FROM files f, REGEXP_SPLIT_TO_TABLE(PG_READ_FILE(filename), '\n\s*\n') r +				FROM files f, REGEXP_SPLIT_TO_TABLE(f.filecontent, '\n\s*\n') r  				WHERE filetypeid = 3) m  		</sql>  	</project2:sqltask> diff --git a/gentoobrowse/console/use.global.xml b/gentoobrowse/console/use.global.xml index 9ee2cfc..e437514 100644 --- a/gentoobrowse/console/use.global.xml +++ b/gentoobrowse/console/use.global.xml @@ -5,7 +5,7 @@  		<sql>  			SELECT u.ud[1] AS use, u.ud[2] AS description  			FROM (SELECT regexp_matches(u, '([^ ]+) - (.+)') ud -				FROM files f, regexp_split_to_table(pg_read_file(f.filename), '\n') u +				FROM files f, regexp_split_to_table(f.filecontent, '\n') u  				WHERE f.filetypeid = 5) u  		</sql>  		<columns> diff --git a/gentoobrowse/console/use.grouped.xml b/gentoobrowse/console/use.grouped.xml index d54bb90..ea93357 100644 --- a/gentoobrowse/console/use.grouped.xml +++ b/gentoobrowse/console/use.grouped.xml @@ -15,7 +15,7 @@  		<sql>  			SELECT ug.usegroupid, u.ud[1] AS use, MIN(u.ud[2]) AS description  			FROM (SELECT split_part(pathparts[3], '.', 1) AS name, regexp_matches(u, '([^ ]+) - (.*)') ud -				FROM files f, regexp_split_to_table(pg_read_file(f.filename), '\n') u +				FROM files f, regexp_split_to_table(f.filecontent, '\n') u  				WHERE f.filetypeid = 9) u, use_groups ug  			WHERE ug.name = u.name  			GROUP BY ug.usegroupid, u.ud[1] diff --git a/gentoobrowse/console/use.local.xml b/gentoobrowse/console/use.local.xml index 882ee82..e9e5f52 100644 --- a/gentoobrowse/console/use.local.xml +++ b/gentoobrowse/console/use.local.xml @@ -5,7 +5,7 @@  		<sql>  			SELECT p.packageid, u.ud[3] AS use, u.ud[4] AS description  			FROM (SELECT regexp_matches(u, '([^/]+)/([^:]+):([^ ]+) - (.+)') ud -				FROM files f, regexp_split_to_table(pg_read_file(f.filename), '\n') u +				FROM files f, regexp_split_to_table(f.filecontent, '\n') u  				WHERE f.filetypeid = 6) u, categories c, packages p  			WHERE c.name = u.ud[1]  			AND p.name = u.ud[2] diff --git a/gentoobrowse/datasources/schema.sql b/gentoobrowse/datasources/schema.sql index dc6919d..54e1351 100644 --- a/gentoobrowse/datasources/schema.sql +++ b/gentoobrowse/datasources/schema.sql @@ -331,9 +331,19 @@ CREATE TABLE files (      filetypeid integer NOT NULL,      repoid integer NOT NULL,      filesize integer NOT NULL, -    pathparts text[] NOT NULL +    pathparts text[] NOT NULL, +    encoding text  );  ALTER TABLE files OWNER TO gentoo; +-- Name: filecontent(files); Type: FUNCTION; Schema: gentoobrowse; Owner: gentoo +CREATE FUNCTION filecontent(f files) RETURNS text +    LANGUAGE plpgsql IMMUTABLE +    AS $$ +begin +	return CONVERT_FROM(PG_READ_BINARY_FILE(f.filename), COALESCE(f.encoding, 'utf-8')); +end +$$; +ALTER FUNCTION gentoobrowse.filecontent(f files) OWNER TO gentoo;  -- Name: filetypes; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace:   CREATE TABLE filetypes (      filetypeid integer NOT NULL, | 
