diff options
| author | Dan Goodliffe <dan@randomdan.homeip.net> | 2015-08-07 01:11:04 +0100 | 
|---|---|---|
| committer | Dan Goodliffe <dan@randomdan.homeip.net> | 2015-08-07 01:11:04 +0100 | 
| commit | a48df96ee252dbf78190937e6b765fc6f4e3e62f (patch) | |
| tree | 3766ace50d5cd8b36aec9894dd2cedcf70a7c963 | |
| parent | Schema tools (diff) | |
| download | gentoobrowse-a48df96ee252dbf78190937e6b765fc6f4e3e62f.tar.bz2 gentoobrowse-a48df96ee252dbf78190937e6b765fc6f4e3e62f.tar.xz gentoobrowse-a48df96ee252dbf78190937e6b765fc6f4e3e62f.zip  | |
Replace file finding with pure SQL version with multi-repo support
| -rw-r--r-- | gentoobrowse/console/fileimport.xml | 104 | ||||
| -rw-r--r-- | gentoobrowse/datasources/schema.sql | 64 | 
2 files changed, 92 insertions, 76 deletions
diff --git a/gentoobrowse/console/fileimport.xml b/gentoobrowse/console/fileimport.xml index 236777b..7099839 100644 --- a/gentoobrowse/console/fileimport.xml +++ b/gentoobrowse/console/fileimport.xml @@ -1,85 +1,45 @@  <?xml version="1.0"?>  <packageimport name="bugassociate" xmlns:xi="http://www.w3.org/2001/XInclude"  		xmlns:project2="http://project2.randomdan.homeip.net"> -	<project2:sqlrows name="fileTypes" datasource="postgres"> -		<sql> -			SELECT filetypeid, searchroot, searchspec -			FROM filetypes -			ORDER BY filetypeid DESC -		</sql> -	</project2:sqlrows> -	<project2:fsrows name="findCacheFiles"> -		<root source="local" name="root" /> -		<spec source="local" name="spec" /> -	</project2:fsrows> - -	<project2:sqlmerge name="files" datasource="postgres" targettable="files"> -		<project2:iterate name="eachFileType" source="fileTypes"> -			<project2:iterate name="eachCacheFile" source="findCacheFiles"> -				<parameters> -					<root source="parent" name="searchroot" depth="1" /> -					<spec source="parent" name="searchspec" depth="1" /> -				</parameters> -				<project2:sqlmergeinsert name="insertFile"> -					<parameters> -						<filename source="parent" attribute="relPath" depth="1" /> -						<moddate source="parent" attribute="modifiedDate" depth="1"/> -						<filetypeid source="parent" name="filetypeid" depth="2" /> -					</parameters> -				</project2:sqlmergeinsert> -			</project2:iterate> -		</project2:iterate> +	<project2:sqlmerge name="repos" datasource="postgres" targettable="repos" useview="yes"> +		<sql>SELECT name FROM PG_LS_DIR('gentoobrowse') name</sql>  		<columns> -			<filename key="true" /> -			<moddate /> -			<filetypeid /> +			<name key="true" />  		</columns>  	</project2:sqlmerge> -	<project2:sqlrows name="outDatedFiles" datasource="postgres"> +	<project2:sqlmerge name="files" datasource="postgres" targettable="files">  		<sql> -			SELECT f.fileid, ft.searchroot || f.filename AS filename, f.cachedat, f.moddate -			FROM files f, filetypes ft -			WHERE (f.cachedat IS NULL OR f.cachedat != f.moddate) -			AND f.filetypeid = ft.filetypeid -			AND ft.content -			ORDER BY f.fileid +			WITH RECURSIVE filelist AS ( +				SELECT ('gentoobrowse/' || pg_ls_dir) AS filename, st.size, st.modification, st.isdir +				FROM PG_LS_DIR('gentoobrowse'), PG_STAT_FILE('gentoobrowse') st +				UNION ALL +				SELECT parent.filename || '/' || cur, st.size, st.modification, st.isdir +				FROM filelist parent, PG_LS_DIR(parent.filename) cur, PG_STAT_FILE(parent.filename || '/' || cur) st +				WHERE parent.isdir +				AND cur != 'packages' +				AND cur != 'distfiles' +				AND cur NOT LIKE '.%' +			) +			SELECT +				r.repoid, +				fl.filename, +				ft.filetypeid, +				(STRING_TO_ARRAY(fl.filename, '/'))[3:100] pathparts, +				fl.size filesize, +				fl.modification AT TIME ZONE 'utc' moddate +			FROM filelist fl, repos r, filetypes ft +			WHERE (STRING_TO_ARRAY(fl.filename, '/'))[2] = r.name +			AND (STRING_TO_ARRAY(fl.filename, '/'))[3:100] ~ ft.spec +			AND NOT fl.isdir  		</sql> -	</project2:sqlrows> -	<project2:streamrows name="readFileContent" newline="
" encoding="utf-8" fieldSep="" quoteChar="" keepBlankRows="false" countBlankRows="true"> -		<project2:filestream> -			<path source="parent" name="filename" depth="1" /> -		</project2:filestream>  		<columns> -			<content /> -		</columns> -	</project2:streamrows> - -	<project2:sqlmerge name="loadCacheContent" datasource="postgres" targettable="filecache"> -		<updatewhere>a.fileid IN (SELECT o.fileid FROM files o WHERE o.cachedat IS NULL OR o.cachedat != o.moddate)</updatewhere> -		<project2:iterate name="eachOutDatedFile" source="outDatedFiles"> -			<project2:iterate name="eachFileContent" source="readFileContent"> -				<project2:sqlmergeinsert name="insertFile"> -					<parameters> -						<fileid source="parent" name="fileid" depth="2" /> -						<line source="parent" attribute="rownum" depth="1" /> -						<content source="parent" name="content" depth="1" /> -					</parameters> -				</project2:sqlmergeinsert> -			</project2:iterate> -		</project2:iterate> -		<columns> -			<fileid key="true" /> -			<line key="true" /> -			<content /> +			<filename key="true" /> +			<repoid key="true" /> +			<pathparts /> +			<filesize /> +			<moddate /> +			<filetypeid />  		</columns>  	</project2:sqlmerge> - -	<project2:sqltask name="setCachedTime" datasource="postgres"> -		<sql> -			UPDATE files SET cachedat = moddate -			WHERE (cachedat != moddate OR cachedat IS NULL) -			AND filetypeid IN (SELECT filetypeid FROM filetypes WHERE content = TRUE) -		</sql> -	</project2:sqltask>  </packageimport> diff --git a/gentoobrowse/datasources/schema.sql b/gentoobrowse/datasources/schema.sql index 90d6b93..a93a2c2 100644 --- a/gentoobrowse/datasources/schema.sql +++ b/gentoobrowse/datasources/schema.sql @@ -18,6 +18,12 @@ CREATE TYPE ebuildversion AS (  	revision bigint  );  ALTER TYPE ebuildversion OWNER TO gentoo; +-- Name: filterspec; Type: TYPE; Schema: gentoobrowse; Owner: postgres +CREATE TYPE filterspec AS ( +	part integer, +	pattern text +); +ALTER TYPE filterspec OWNER TO postgres;  SET default_tablespace = '';  SET default_with_oids = false;  -- Name: bugs; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace:  @@ -212,6 +218,27 @@ BEGIN  END  $$;  ALTER FUNCTION gentoobrowse.packagefts(p packages) OWNER TO gentoo; +-- Name: pathpartsmatchesspecs(text[], filterspec[]); Type: FUNCTION; Schema: gentoobrowse; Owner: gentoo +CREATE FUNCTION pathpartsmatchesspecs(pathparts text[], spec filterspec[]) RETURNS boolean +    LANGUAGE plpgsql +    AS $$ +DECLARE +	i integer; +	p integer; +BEGIN +	p := array_upper(pathparts, 1); +	FOR i IN 1 .. array_upper(spec, 1) LOOP +		IF (p < spec[i].part) THEN RETURN FALSE; END IF; +		IF (spec[i].part <= 0) THEN +			IF (pathparts[p - spec[i].part] NOT LIKE spec[i].pattern) THEN RETURN FALSE; END IF; +		ELSE +			IF (pathparts[spec[i].part] NOT LIKE spec[i].pattern) THEN RETURN FALSE; END IF; +		END IF; +	END LOOP; +	RETURN TRUE; +END; +$$; +ALTER FUNCTION gentoobrowse.pathpartsmatchesspecs(pathparts text[], spec filterspec[]) OWNER TO gentoo;  -- Name: ~; Type: OPERATOR; Schema: gentoobrowse; Owner: gentoo  CREATE OPERATOR ~ (      PROCEDURE = ebuildversionrange, @@ -219,6 +246,13 @@ CREATE OPERATOR ~ (      RIGHTARG = ebuildversion  );  ALTER OPERATOR gentoobrowse.~ (ebuildversion, ebuildversion) OWNER TO gentoo; +-- Name: ~; Type: OPERATOR; Schema: gentoobrowse; Owner: postgres +CREATE OPERATOR ~ ( +    PROCEDURE = pathpartsmatchesspecs, +    LEFTARG = text[], +    RIGHTARG = filterspec[] +); +ALTER OPERATOR gentoobrowse.~ (text[], filterspec[]) OWNER TO postgres;  -- Name: categories; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace:   CREATE TABLE categories (      categoryid integer NOT NULL, @@ -302,7 +336,10 @@ CREATE TABLE files (      moddate timestamp without time zone NOT NULL,      firstseen timestamp without time zone DEFAULT now() NOT NULL,      cachedat timestamp without time zone, -    filetypeid integer NOT NULL +    filetypeid integer NOT NULL, +    repoid integer DEFAULT 1 NOT NULL, +    filesize integer, +    pathparts text[]  );  ALTER TABLE files OWNER TO gentoo;  -- Name: file_masks; Type: VIEW; Schema: gentoobrowse; Owner: gentoo @@ -331,9 +368,7 @@ ALTER TABLE file_masks_withset OWNER TO gentoo;  CREATE TABLE filetypes (      filetypeid integer NOT NULL,      description text NOT NULL, -    searchroot text, -    searchspec text, -    content boolean DEFAULT true NOT NULL +    spec filterspec[]  );  ALTER TABLE filetypes OWNER TO gentoo;  -- Name: TABLE filetypes; Type: COMMENT; Schema: gentoobrowse; Owner: gentoo @@ -380,6 +415,22 @@ CREATE TABLE package_urls (  ALTER TABLE package_urls OWNER TO gentoo;  -- Name: TABLE package_urls; Type: COMMENT; Schema: gentoobrowse; Owner: gentoo  COMMENT ON TABLE package_urls IS 'URLs associated with a given package'; +-- Name: repos; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace:  +CREATE TABLE repos ( +    repoid integer NOT NULL, +    name text NOT NULL +); +ALTER TABLE repos OWNER TO gentoo; +-- Name: repos_repoid_seq; Type: SEQUENCE; Schema: gentoobrowse; Owner: gentoo +CREATE SEQUENCE repos_repoid_seq +    START WITH 1 +    INCREMENT BY 1 +    NO MINVALUE +    NO MAXVALUE +    CACHE 1; +ALTER TABLE repos_repoid_seq OWNER TO gentoo; +-- Name: repos_repoid_seq; Type: SEQUENCE OWNED BY; Schema: gentoobrowse; Owner: gentoo +ALTER SEQUENCE repos_repoid_seq OWNED BY repos.repoid;  -- Name: seq_categoryid; Type: SEQUENCE; Schema: gentoobrowse; Owner: gentoo  CREATE SEQUENCE seq_categoryid      START WITH 1 @@ -490,6 +541,8 @@ ALTER TABLE ONLY categories ALTER COLUMN categoryid SET DEFAULT nextval('seq_cat  ALTER TABLE ONLY files ALTER COLUMN fileid SET DEFAULT nextval('seq_fileid'::regclass);  -- Name: packageid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo  ALTER TABLE ONLY packages ALTER COLUMN packageid SET DEFAULT nextval('seq_packageid'::regclass); +-- Name: repoid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo +ALTER TABLE ONLY repos ALTER COLUMN repoid SET DEFAULT nextval('repos_repoid_seq'::regclass);  -- Name: usegroupid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo  ALTER TABLE ONLY use_groups ALTER COLUMN usegroupid SET DEFAULT nextval('use_groups_usegroupid_seq'::regclass);  -- Name: userid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo @@ -566,6 +619,9 @@ ALTER TABLE user_packages CLUSTER ON pk_userpackages;  -- Name: pk_users; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace:   ALTER TABLE ONLY users      ADD CONSTRAINT pk_users PRIMARY KEY (userid); +-- Name: repos_pkey; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace:  +ALTER TABLE ONLY repos +    ADD CONSTRAINT repos_pkey PRIMARY KEY (repoid);  -- Name: uni_ebuild_pkgver; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace:   ALTER TABLE ONLY ebuilds      ADD CONSTRAINT uni_ebuild_pkgver UNIQUE (packageid, version);  | 
