From a48df96ee252dbf78190937e6b765fc6f4e3e62f Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Fri, 7 Aug 2015 01:11:04 +0100 Subject: Replace file finding with pure SQL version with multi-repo support --- gentoobrowse/console/fileimport.xml | 104 +++++++++++------------------------- gentoobrowse/datasources/schema.sql | 64 ++++++++++++++++++++-- 2 files changed, 92 insertions(+), 76 deletions(-) diff --git a/gentoobrowse/console/fileimport.xml b/gentoobrowse/console/fileimport.xml index 236777b..7099839 100644 --- a/gentoobrowse/console/fileimport.xml +++ b/gentoobrowse/console/fileimport.xml @@ -1,85 +1,45 @@ - - - SELECT filetypeid, searchroot, searchspec - FROM filetypes - ORDER BY filetypeid DESC - - - - - - - - - - - - - - - - - - - - - - - + + SELECT name FROM PG_LS_DIR('gentoobrowse') name - - - + - + - SELECT f.fileid, ft.searchroot || f.filename AS filename, f.cachedat, f.moddate - FROM files f, filetypes ft - WHERE (f.cachedat IS NULL OR f.cachedat != f.moddate) - AND f.filetypeid = ft.filetypeid - AND ft.content - ORDER BY f.fileid + WITH RECURSIVE filelist AS ( + SELECT ('gentoobrowse/' || pg_ls_dir) AS filename, st.size, st.modification, st.isdir + FROM PG_LS_DIR('gentoobrowse'), PG_STAT_FILE('gentoobrowse') st + UNION ALL + SELECT parent.filename || '/' || cur, st.size, st.modification, st.isdir + FROM filelist parent, PG_LS_DIR(parent.filename) cur, PG_STAT_FILE(parent.filename || '/' || cur) st + WHERE parent.isdir + AND cur != 'packages' + AND cur != 'distfiles' + AND cur NOT LIKE '.%' + ) + SELECT + r.repoid, + fl.filename, + ft.filetypeid, + (STRING_TO_ARRAY(fl.filename, '/'))[3:100] pathparts, + fl.size filesize, + fl.modification AT TIME ZONE 'utc' moddate + FROM filelist fl, repos r, filetypes ft + WHERE (STRING_TO_ARRAY(fl.filename, '/'))[2] = r.name + AND (STRING_TO_ARRAY(fl.filename, '/'))[3:100] ~ ft.spec + AND NOT fl.isdir - - - - - - - - - - - a.fileid IN (SELECT o.fileid FROM files o WHERE o.cachedat IS NULL OR o.cachedat != o.moddate) - - - - - - - - - - - - - - - + + + + + + - - - - UPDATE files SET cachedat = moddate - WHERE (cachedat != moddate OR cachedat IS NULL) - AND filetypeid IN (SELECT filetypeid FROM filetypes WHERE content = TRUE) - - diff --git a/gentoobrowse/datasources/schema.sql b/gentoobrowse/datasources/schema.sql index 90d6b93..a93a2c2 100644 --- a/gentoobrowse/datasources/schema.sql +++ b/gentoobrowse/datasources/schema.sql @@ -18,6 +18,12 @@ CREATE TYPE ebuildversion AS ( revision bigint ); ALTER TYPE ebuildversion OWNER TO gentoo; +-- Name: filterspec; Type: TYPE; Schema: gentoobrowse; Owner: postgres +CREATE TYPE filterspec AS ( + part integer, + pattern text +); +ALTER TYPE filterspec OWNER TO postgres; SET default_tablespace = ''; SET default_with_oids = false; -- Name: bugs; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace: @@ -212,6 +218,27 @@ BEGIN END $$; ALTER FUNCTION gentoobrowse.packagefts(p packages) OWNER TO gentoo; +-- Name: pathpartsmatchesspecs(text[], filterspec[]); Type: FUNCTION; Schema: gentoobrowse; Owner: gentoo +CREATE FUNCTION pathpartsmatchesspecs(pathparts text[], spec filterspec[]) RETURNS boolean + LANGUAGE plpgsql + AS $$ +DECLARE + i integer; + p integer; +BEGIN + p := array_upper(pathparts, 1); + FOR i IN 1 .. array_upper(spec, 1) LOOP + IF (p < spec[i].part) THEN RETURN FALSE; END IF; + IF (spec[i].part <= 0) THEN + IF (pathparts[p - spec[i].part] NOT LIKE spec[i].pattern) THEN RETURN FALSE; END IF; + ELSE + IF (pathparts[spec[i].part] NOT LIKE spec[i].pattern) THEN RETURN FALSE; END IF; + END IF; + END LOOP; + RETURN TRUE; +END; +$$; +ALTER FUNCTION gentoobrowse.pathpartsmatchesspecs(pathparts text[], spec filterspec[]) OWNER TO gentoo; -- Name: ~; Type: OPERATOR; Schema: gentoobrowse; Owner: gentoo CREATE OPERATOR ~ ( PROCEDURE = ebuildversionrange, @@ -219,6 +246,13 @@ CREATE OPERATOR ~ ( RIGHTARG = ebuildversion ); ALTER OPERATOR gentoobrowse.~ (ebuildversion, ebuildversion) OWNER TO gentoo; +-- Name: ~; Type: OPERATOR; Schema: gentoobrowse; Owner: postgres +CREATE OPERATOR ~ ( + PROCEDURE = pathpartsmatchesspecs, + LEFTARG = text[], + RIGHTARG = filterspec[] +); +ALTER OPERATOR gentoobrowse.~ (text[], filterspec[]) OWNER TO postgres; -- Name: categories; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace: CREATE TABLE categories ( categoryid integer NOT NULL, @@ -302,7 +336,10 @@ CREATE TABLE files ( moddate timestamp without time zone NOT NULL, firstseen timestamp without time zone DEFAULT now() NOT NULL, cachedat timestamp without time zone, - filetypeid integer NOT NULL + filetypeid integer NOT NULL, + repoid integer DEFAULT 1 NOT NULL, + filesize integer, + pathparts text[] ); ALTER TABLE files OWNER TO gentoo; -- Name: file_masks; Type: VIEW; Schema: gentoobrowse; Owner: gentoo @@ -331,9 +368,7 @@ ALTER TABLE file_masks_withset OWNER TO gentoo; CREATE TABLE filetypes ( filetypeid integer NOT NULL, description text NOT NULL, - searchroot text, - searchspec text, - content boolean DEFAULT true NOT NULL + spec filterspec[] ); ALTER TABLE filetypes OWNER TO gentoo; -- Name: TABLE filetypes; Type: COMMENT; Schema: gentoobrowse; Owner: gentoo @@ -380,6 +415,22 @@ CREATE TABLE package_urls ( ALTER TABLE package_urls OWNER TO gentoo; -- Name: TABLE package_urls; Type: COMMENT; Schema: gentoobrowse; Owner: gentoo COMMENT ON TABLE package_urls IS 'URLs associated with a given package'; +-- Name: repos; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace: +CREATE TABLE repos ( + repoid integer NOT NULL, + name text NOT NULL +); +ALTER TABLE repos OWNER TO gentoo; +-- Name: repos_repoid_seq; Type: SEQUENCE; Schema: gentoobrowse; Owner: gentoo +CREATE SEQUENCE repos_repoid_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +ALTER TABLE repos_repoid_seq OWNER TO gentoo; +-- Name: repos_repoid_seq; Type: SEQUENCE OWNED BY; Schema: gentoobrowse; Owner: gentoo +ALTER SEQUENCE repos_repoid_seq OWNED BY repos.repoid; -- Name: seq_categoryid; Type: SEQUENCE; Schema: gentoobrowse; Owner: gentoo CREATE SEQUENCE seq_categoryid START WITH 1 @@ -490,6 +541,8 @@ ALTER TABLE ONLY categories ALTER COLUMN categoryid SET DEFAULT nextval('seq_cat ALTER TABLE ONLY files ALTER COLUMN fileid SET DEFAULT nextval('seq_fileid'::regclass); -- Name: packageid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo ALTER TABLE ONLY packages ALTER COLUMN packageid SET DEFAULT nextval('seq_packageid'::regclass); +-- Name: repoid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo +ALTER TABLE ONLY repos ALTER COLUMN repoid SET DEFAULT nextval('repos_repoid_seq'::regclass); -- Name: usegroupid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo ALTER TABLE ONLY use_groups ALTER COLUMN usegroupid SET DEFAULT nextval('use_groups_usegroupid_seq'::regclass); -- Name: userid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo @@ -566,6 +619,9 @@ ALTER TABLE user_packages CLUSTER ON pk_userpackages; -- Name: pk_users; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace: ALTER TABLE ONLY users ADD CONSTRAINT pk_users PRIMARY KEY (userid); +-- Name: repos_pkey; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace: +ALTER TABLE ONLY repos + ADD CONSTRAINT repos_pkey PRIMARY KEY (repoid); -- Name: uni_ebuild_pkgver; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace: ALTER TABLE ONLY ebuilds ADD CONSTRAINT uni_ebuild_pkgver UNIQUE (packageid, version); -- cgit v1.2.3