diff options
author | Dan Goodliffe <dan@randomdan.homeip.net> | 2015-08-07 01:11:04 +0100 |
---|---|---|
committer | Dan Goodliffe <dan@randomdan.homeip.net> | 2015-08-07 01:11:04 +0100 |
commit | a48df96ee252dbf78190937e6b765fc6f4e3e62f (patch) | |
tree | 3766ace50d5cd8b36aec9894dd2cedcf70a7c963 | |
parent | Schema tools (diff) | |
download | gentoobrowse-a48df96ee252dbf78190937e6b765fc6f4e3e62f.tar.bz2 gentoobrowse-a48df96ee252dbf78190937e6b765fc6f4e3e62f.tar.xz gentoobrowse-a48df96ee252dbf78190937e6b765fc6f4e3e62f.zip |
Replace file finding with pure SQL version with multi-repo support
-rw-r--r-- | gentoobrowse/console/fileimport.xml | 104 | ||||
-rw-r--r-- | gentoobrowse/datasources/schema.sql | 64 |
2 files changed, 92 insertions, 76 deletions
diff --git a/gentoobrowse/console/fileimport.xml b/gentoobrowse/console/fileimport.xml index 236777b..7099839 100644 --- a/gentoobrowse/console/fileimport.xml +++ b/gentoobrowse/console/fileimport.xml @@ -1,85 +1,45 @@ <?xml version="1.0"?> <packageimport name="bugassociate" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:project2="http://project2.randomdan.homeip.net"> - <project2:sqlrows name="fileTypes" datasource="postgres"> - <sql> - SELECT filetypeid, searchroot, searchspec - FROM filetypes - ORDER BY filetypeid DESC - </sql> - </project2:sqlrows> - <project2:fsrows name="findCacheFiles"> - <root source="local" name="root" /> - <spec source="local" name="spec" /> - </project2:fsrows> - - <project2:sqlmerge name="files" datasource="postgres" targettable="files"> - <project2:iterate name="eachFileType" source="fileTypes"> - <project2:iterate name="eachCacheFile" source="findCacheFiles"> - <parameters> - <root source="parent" name="searchroot" depth="1" /> - <spec source="parent" name="searchspec" depth="1" /> - </parameters> - <project2:sqlmergeinsert name="insertFile"> - <parameters> - <filename source="parent" attribute="relPath" depth="1" /> - <moddate source="parent" attribute="modifiedDate" depth="1"/> - <filetypeid source="parent" name="filetypeid" depth="2" /> - </parameters> - </project2:sqlmergeinsert> - </project2:iterate> - </project2:iterate> + <project2:sqlmerge name="repos" datasource="postgres" targettable="repos" useview="yes"> + <sql>SELECT name FROM PG_LS_DIR('gentoobrowse') name</sql> <columns> - <filename key="true" /> - <moddate /> - <filetypeid /> + <name key="true" /> </columns> </project2:sqlmerge> - <project2:sqlrows name="outDatedFiles" datasource="postgres"> + <project2:sqlmerge name="files" datasource="postgres" targettable="files"> <sql> - SELECT f.fileid, ft.searchroot || f.filename AS filename, f.cachedat, f.moddate - FROM files f, filetypes ft - WHERE (f.cachedat IS NULL OR f.cachedat != f.moddate) - AND f.filetypeid = ft.filetypeid - AND ft.content - ORDER BY f.fileid + WITH RECURSIVE filelist AS ( + SELECT ('gentoobrowse/' || pg_ls_dir) AS filename, st.size, st.modification, st.isdir + FROM PG_LS_DIR('gentoobrowse'), PG_STAT_FILE('gentoobrowse') st + UNION ALL + SELECT parent.filename || '/' || cur, st.size, st.modification, st.isdir + FROM filelist parent, PG_LS_DIR(parent.filename) cur, PG_STAT_FILE(parent.filename || '/' || cur) st + WHERE parent.isdir + AND cur != 'packages' + AND cur != 'distfiles' + AND cur NOT LIKE '.%' + ) + SELECT + r.repoid, + fl.filename, + ft.filetypeid, + (STRING_TO_ARRAY(fl.filename, '/'))[3:100] pathparts, + fl.size filesize, + fl.modification AT TIME ZONE 'utc' moddate + FROM filelist fl, repos r, filetypes ft + WHERE (STRING_TO_ARRAY(fl.filename, '/'))[2] = r.name + AND (STRING_TO_ARRAY(fl.filename, '/'))[3:100] ~ ft.spec + AND NOT fl.isdir </sql> - </project2:sqlrows> - <project2:streamrows name="readFileContent" newline=" " encoding="utf-8" fieldSep="" quoteChar="" keepBlankRows="false" countBlankRows="true"> - <project2:filestream> - <path source="parent" name="filename" depth="1" /> - </project2:filestream> <columns> - <content /> - </columns> - </project2:streamrows> - - <project2:sqlmerge name="loadCacheContent" datasource="postgres" targettable="filecache"> - <updatewhere>a.fileid IN (SELECT o.fileid FROM files o WHERE o.cachedat IS NULL OR o.cachedat != o.moddate)</updatewhere> - <project2:iterate name="eachOutDatedFile" source="outDatedFiles"> - <project2:iterate name="eachFileContent" source="readFileContent"> - <project2:sqlmergeinsert name="insertFile"> - <parameters> - <fileid source="parent" name="fileid" depth="2" /> - <line source="parent" attribute="rownum" depth="1" /> - <content source="parent" name="content" depth="1" /> - </parameters> - </project2:sqlmergeinsert> - </project2:iterate> - </project2:iterate> - <columns> - <fileid key="true" /> - <line key="true" /> - <content /> + <filename key="true" /> + <repoid key="true" /> + <pathparts /> + <filesize /> + <moddate /> + <filetypeid /> </columns> </project2:sqlmerge> - - <project2:sqltask name="setCachedTime" datasource="postgres"> - <sql> - UPDATE files SET cachedat = moddate - WHERE (cachedat != moddate OR cachedat IS NULL) - AND filetypeid IN (SELECT filetypeid FROM filetypes WHERE content = TRUE) - </sql> - </project2:sqltask> </packageimport> diff --git a/gentoobrowse/datasources/schema.sql b/gentoobrowse/datasources/schema.sql index 90d6b93..a93a2c2 100644 --- a/gentoobrowse/datasources/schema.sql +++ b/gentoobrowse/datasources/schema.sql @@ -18,6 +18,12 @@ CREATE TYPE ebuildversion AS ( revision bigint ); ALTER TYPE ebuildversion OWNER TO gentoo; +-- Name: filterspec; Type: TYPE; Schema: gentoobrowse; Owner: postgres +CREATE TYPE filterspec AS ( + part integer, + pattern text +); +ALTER TYPE filterspec OWNER TO postgres; SET default_tablespace = ''; SET default_with_oids = false; -- Name: bugs; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace: @@ -212,6 +218,27 @@ BEGIN END $$; ALTER FUNCTION gentoobrowse.packagefts(p packages) OWNER TO gentoo; +-- Name: pathpartsmatchesspecs(text[], filterspec[]); Type: FUNCTION; Schema: gentoobrowse; Owner: gentoo +CREATE FUNCTION pathpartsmatchesspecs(pathparts text[], spec filterspec[]) RETURNS boolean + LANGUAGE plpgsql + AS $$ +DECLARE + i integer; + p integer; +BEGIN + p := array_upper(pathparts, 1); + FOR i IN 1 .. array_upper(spec, 1) LOOP + IF (p < spec[i].part) THEN RETURN FALSE; END IF; + IF (spec[i].part <= 0) THEN + IF (pathparts[p - spec[i].part] NOT LIKE spec[i].pattern) THEN RETURN FALSE; END IF; + ELSE + IF (pathparts[spec[i].part] NOT LIKE spec[i].pattern) THEN RETURN FALSE; END IF; + END IF; + END LOOP; + RETURN TRUE; +END; +$$; +ALTER FUNCTION gentoobrowse.pathpartsmatchesspecs(pathparts text[], spec filterspec[]) OWNER TO gentoo; -- Name: ~; Type: OPERATOR; Schema: gentoobrowse; Owner: gentoo CREATE OPERATOR ~ ( PROCEDURE = ebuildversionrange, @@ -219,6 +246,13 @@ CREATE OPERATOR ~ ( RIGHTARG = ebuildversion ); ALTER OPERATOR gentoobrowse.~ (ebuildversion, ebuildversion) OWNER TO gentoo; +-- Name: ~; Type: OPERATOR; Schema: gentoobrowse; Owner: postgres +CREATE OPERATOR ~ ( + PROCEDURE = pathpartsmatchesspecs, + LEFTARG = text[], + RIGHTARG = filterspec[] +); +ALTER OPERATOR gentoobrowse.~ (text[], filterspec[]) OWNER TO postgres; -- Name: categories; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace: CREATE TABLE categories ( categoryid integer NOT NULL, @@ -302,7 +336,10 @@ CREATE TABLE files ( moddate timestamp without time zone NOT NULL, firstseen timestamp without time zone DEFAULT now() NOT NULL, cachedat timestamp without time zone, - filetypeid integer NOT NULL + filetypeid integer NOT NULL, + repoid integer DEFAULT 1 NOT NULL, + filesize integer, + pathparts text[] ); ALTER TABLE files OWNER TO gentoo; -- Name: file_masks; Type: VIEW; Schema: gentoobrowse; Owner: gentoo @@ -331,9 +368,7 @@ ALTER TABLE file_masks_withset OWNER TO gentoo; CREATE TABLE filetypes ( filetypeid integer NOT NULL, description text NOT NULL, - searchroot text, - searchspec text, - content boolean DEFAULT true NOT NULL + spec filterspec[] ); ALTER TABLE filetypes OWNER TO gentoo; -- Name: TABLE filetypes; Type: COMMENT; Schema: gentoobrowse; Owner: gentoo @@ -380,6 +415,22 @@ CREATE TABLE package_urls ( ALTER TABLE package_urls OWNER TO gentoo; -- Name: TABLE package_urls; Type: COMMENT; Schema: gentoobrowse; Owner: gentoo COMMENT ON TABLE package_urls IS 'URLs associated with a given package'; +-- Name: repos; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace: +CREATE TABLE repos ( + repoid integer NOT NULL, + name text NOT NULL +); +ALTER TABLE repos OWNER TO gentoo; +-- Name: repos_repoid_seq; Type: SEQUENCE; Schema: gentoobrowse; Owner: gentoo +CREATE SEQUENCE repos_repoid_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +ALTER TABLE repos_repoid_seq OWNER TO gentoo; +-- Name: repos_repoid_seq; Type: SEQUENCE OWNED BY; Schema: gentoobrowse; Owner: gentoo +ALTER SEQUENCE repos_repoid_seq OWNED BY repos.repoid; -- Name: seq_categoryid; Type: SEQUENCE; Schema: gentoobrowse; Owner: gentoo CREATE SEQUENCE seq_categoryid START WITH 1 @@ -490,6 +541,8 @@ ALTER TABLE ONLY categories ALTER COLUMN categoryid SET DEFAULT nextval('seq_cat ALTER TABLE ONLY files ALTER COLUMN fileid SET DEFAULT nextval('seq_fileid'::regclass); -- Name: packageid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo ALTER TABLE ONLY packages ALTER COLUMN packageid SET DEFAULT nextval('seq_packageid'::regclass); +-- Name: repoid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo +ALTER TABLE ONLY repos ALTER COLUMN repoid SET DEFAULT nextval('repos_repoid_seq'::regclass); -- Name: usegroupid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo ALTER TABLE ONLY use_groups ALTER COLUMN usegroupid SET DEFAULT nextval('use_groups_usegroupid_seq'::regclass); -- Name: userid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo @@ -566,6 +619,9 @@ ALTER TABLE user_packages CLUSTER ON pk_userpackages; -- Name: pk_users; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace: ALTER TABLE ONLY users ADD CONSTRAINT pk_users PRIMARY KEY (userid); +-- Name: repos_pkey; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace: +ALTER TABLE ONLY repos + ADD CONSTRAINT repos_pkey PRIMARY KEY (repoid); -- Name: uni_ebuild_pkgver; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace: ALTER TABLE ONLY ebuilds ADD CONSTRAINT uni_ebuild_pkgver UNIQUE (packageid, version); |