summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Goodliffe <dan@randomdan.homeip.net>2015-08-07 01:11:04 +0100
committerDan Goodliffe <dan@randomdan.homeip.net>2015-08-07 01:11:04 +0100
commita48df96ee252dbf78190937e6b765fc6f4e3e62f (patch)
tree3766ace50d5cd8b36aec9894dd2cedcf70a7c963
parentSchema tools (diff)
downloadgentoobrowse-a48df96ee252dbf78190937e6b765fc6f4e3e62f.tar.bz2
gentoobrowse-a48df96ee252dbf78190937e6b765fc6f4e3e62f.tar.xz
gentoobrowse-a48df96ee252dbf78190937e6b765fc6f4e3e62f.zip
Replace file finding with pure SQL version with multi-repo support
-rw-r--r--gentoobrowse/console/fileimport.xml104
-rw-r--r--gentoobrowse/datasources/schema.sql64
2 files changed, 92 insertions, 76 deletions
diff --git a/gentoobrowse/console/fileimport.xml b/gentoobrowse/console/fileimport.xml
index 236777b..7099839 100644
--- a/gentoobrowse/console/fileimport.xml
+++ b/gentoobrowse/console/fileimport.xml
@@ -1,85 +1,45 @@
<?xml version="1.0"?>
<packageimport name="bugassociate" xmlns:xi="http://www.w3.org/2001/XInclude"
xmlns:project2="http://project2.randomdan.homeip.net">
- <project2:sqlrows name="fileTypes" datasource="postgres">
- <sql>
- SELECT filetypeid, searchroot, searchspec
- FROM filetypes
- ORDER BY filetypeid DESC
- </sql>
- </project2:sqlrows>
- <project2:fsrows name="findCacheFiles">
- <root source="local" name="root" />
- <spec source="local" name="spec" />
- </project2:fsrows>
-
- <project2:sqlmerge name="files" datasource="postgres" targettable="files">
- <project2:iterate name="eachFileType" source="fileTypes">
- <project2:iterate name="eachCacheFile" source="findCacheFiles">
- <parameters>
- <root source="parent" name="searchroot" depth="1" />
- <spec source="parent" name="searchspec" depth="1" />
- </parameters>
- <project2:sqlmergeinsert name="insertFile">
- <parameters>
- <filename source="parent" attribute="relPath" depth="1" />
- <moddate source="parent" attribute="modifiedDate" depth="1"/>
- <filetypeid source="parent" name="filetypeid" depth="2" />
- </parameters>
- </project2:sqlmergeinsert>
- </project2:iterate>
- </project2:iterate>
+ <project2:sqlmerge name="repos" datasource="postgres" targettable="repos" useview="yes">
+ <sql>SELECT name FROM PG_LS_DIR('gentoobrowse') name</sql>
<columns>
- <filename key="true" />
- <moddate />
- <filetypeid />
+ <name key="true" />
</columns>
</project2:sqlmerge>
- <project2:sqlrows name="outDatedFiles" datasource="postgres">
+ <project2:sqlmerge name="files" datasource="postgres" targettable="files">
<sql>
- SELECT f.fileid, ft.searchroot || f.filename AS filename, f.cachedat, f.moddate
- FROM files f, filetypes ft
- WHERE (f.cachedat IS NULL OR f.cachedat != f.moddate)
- AND f.filetypeid = ft.filetypeid
- AND ft.content
- ORDER BY f.fileid
+ WITH RECURSIVE filelist AS (
+ SELECT ('gentoobrowse/' || pg_ls_dir) AS filename, st.size, st.modification, st.isdir
+ FROM PG_LS_DIR('gentoobrowse'), PG_STAT_FILE('gentoobrowse') st
+ UNION ALL
+ SELECT parent.filename || '/' || cur, st.size, st.modification, st.isdir
+ FROM filelist parent, PG_LS_DIR(parent.filename) cur, PG_STAT_FILE(parent.filename || '/' || cur) st
+ WHERE parent.isdir
+ AND cur != 'packages'
+ AND cur != 'distfiles'
+ AND cur NOT LIKE '.%'
+ )
+ SELECT
+ r.repoid,
+ fl.filename,
+ ft.filetypeid,
+ (STRING_TO_ARRAY(fl.filename, '/'))[3:100] pathparts,
+ fl.size filesize,
+ fl.modification AT TIME ZONE 'utc' moddate
+ FROM filelist fl, repos r, filetypes ft
+ WHERE (STRING_TO_ARRAY(fl.filename, '/'))[2] = r.name
+ AND (STRING_TO_ARRAY(fl.filename, '/'))[3:100] ~ ft.spec
+ AND NOT fl.isdir
</sql>
- </project2:sqlrows>
- <project2:streamrows name="readFileContent" newline="&#10;" encoding="utf-8" fieldSep="" quoteChar="" keepBlankRows="false" countBlankRows="true">
- <project2:filestream>
- <path source="parent" name="filename" depth="1" />
- </project2:filestream>
<columns>
- <content />
- </columns>
- </project2:streamrows>
-
- <project2:sqlmerge name="loadCacheContent" datasource="postgres" targettable="filecache">
- <updatewhere>a.fileid IN (SELECT o.fileid FROM files o WHERE o.cachedat IS NULL OR o.cachedat != o.moddate)</updatewhere>
- <project2:iterate name="eachOutDatedFile" source="outDatedFiles">
- <project2:iterate name="eachFileContent" source="readFileContent">
- <project2:sqlmergeinsert name="insertFile">
- <parameters>
- <fileid source="parent" name="fileid" depth="2" />
- <line source="parent" attribute="rownum" depth="1" />
- <content source="parent" name="content" depth="1" />
- </parameters>
- </project2:sqlmergeinsert>
- </project2:iterate>
- </project2:iterate>
- <columns>
- <fileid key="true" />
- <line key="true" />
- <content />
+ <filename key="true" />
+ <repoid key="true" />
+ <pathparts />
+ <filesize />
+ <moddate />
+ <filetypeid />
</columns>
</project2:sqlmerge>
-
- <project2:sqltask name="setCachedTime" datasource="postgres">
- <sql>
- UPDATE files SET cachedat = moddate
- WHERE (cachedat != moddate OR cachedat IS NULL)
- AND filetypeid IN (SELECT filetypeid FROM filetypes WHERE content = TRUE)
- </sql>
- </project2:sqltask>
</packageimport>
diff --git a/gentoobrowse/datasources/schema.sql b/gentoobrowse/datasources/schema.sql
index 90d6b93..a93a2c2 100644
--- a/gentoobrowse/datasources/schema.sql
+++ b/gentoobrowse/datasources/schema.sql
@@ -18,6 +18,12 @@ CREATE TYPE ebuildversion AS (
revision bigint
);
ALTER TYPE ebuildversion OWNER TO gentoo;
+-- Name: filterspec; Type: TYPE; Schema: gentoobrowse; Owner: postgres
+CREATE TYPE filterspec AS (
+ part integer,
+ pattern text
+);
+ALTER TYPE filterspec OWNER TO postgres;
SET default_tablespace = '';
SET default_with_oids = false;
-- Name: bugs; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace:
@@ -212,6 +218,27 @@ BEGIN
END
$$;
ALTER FUNCTION gentoobrowse.packagefts(p packages) OWNER TO gentoo;
+-- Name: pathpartsmatchesspecs(text[], filterspec[]); Type: FUNCTION; Schema: gentoobrowse; Owner: gentoo
+CREATE FUNCTION pathpartsmatchesspecs(pathparts text[], spec filterspec[]) RETURNS boolean
+ LANGUAGE plpgsql
+ AS $$
+DECLARE
+ i integer;
+ p integer;
+BEGIN
+ p := array_upper(pathparts, 1);
+ FOR i IN 1 .. array_upper(spec, 1) LOOP
+ IF (p < spec[i].part) THEN RETURN FALSE; END IF;
+ IF (spec[i].part <= 0) THEN
+ IF (pathparts[p - spec[i].part] NOT LIKE spec[i].pattern) THEN RETURN FALSE; END IF;
+ ELSE
+ IF (pathparts[spec[i].part] NOT LIKE spec[i].pattern) THEN RETURN FALSE; END IF;
+ END IF;
+ END LOOP;
+ RETURN TRUE;
+END;
+$$;
+ALTER FUNCTION gentoobrowse.pathpartsmatchesspecs(pathparts text[], spec filterspec[]) OWNER TO gentoo;
-- Name: ~; Type: OPERATOR; Schema: gentoobrowse; Owner: gentoo
CREATE OPERATOR ~ (
PROCEDURE = ebuildversionrange,
@@ -219,6 +246,13 @@ CREATE OPERATOR ~ (
RIGHTARG = ebuildversion
);
ALTER OPERATOR gentoobrowse.~ (ebuildversion, ebuildversion) OWNER TO gentoo;
+-- Name: ~; Type: OPERATOR; Schema: gentoobrowse; Owner: postgres
+CREATE OPERATOR ~ (
+ PROCEDURE = pathpartsmatchesspecs,
+ LEFTARG = text[],
+ RIGHTARG = filterspec[]
+);
+ALTER OPERATOR gentoobrowse.~ (text[], filterspec[]) OWNER TO postgres;
-- Name: categories; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace:
CREATE TABLE categories (
categoryid integer NOT NULL,
@@ -302,7 +336,10 @@ CREATE TABLE files (
moddate timestamp without time zone NOT NULL,
firstseen timestamp without time zone DEFAULT now() NOT NULL,
cachedat timestamp without time zone,
- filetypeid integer NOT NULL
+ filetypeid integer NOT NULL,
+ repoid integer DEFAULT 1 NOT NULL,
+ filesize integer,
+ pathparts text[]
);
ALTER TABLE files OWNER TO gentoo;
-- Name: file_masks; Type: VIEW; Schema: gentoobrowse; Owner: gentoo
@@ -331,9 +368,7 @@ ALTER TABLE file_masks_withset OWNER TO gentoo;
CREATE TABLE filetypes (
filetypeid integer NOT NULL,
description text NOT NULL,
- searchroot text,
- searchspec text,
- content boolean DEFAULT true NOT NULL
+ spec filterspec[]
);
ALTER TABLE filetypes OWNER TO gentoo;
-- Name: TABLE filetypes; Type: COMMENT; Schema: gentoobrowse; Owner: gentoo
@@ -380,6 +415,22 @@ CREATE TABLE package_urls (
ALTER TABLE package_urls OWNER TO gentoo;
-- Name: TABLE package_urls; Type: COMMENT; Schema: gentoobrowse; Owner: gentoo
COMMENT ON TABLE package_urls IS 'URLs associated with a given package';
+-- Name: repos; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace:
+CREATE TABLE repos (
+ repoid integer NOT NULL,
+ name text NOT NULL
+);
+ALTER TABLE repos OWNER TO gentoo;
+-- Name: repos_repoid_seq; Type: SEQUENCE; Schema: gentoobrowse; Owner: gentoo
+CREATE SEQUENCE repos_repoid_seq
+ START WITH 1
+ INCREMENT BY 1
+ NO MINVALUE
+ NO MAXVALUE
+ CACHE 1;
+ALTER TABLE repos_repoid_seq OWNER TO gentoo;
+-- Name: repos_repoid_seq; Type: SEQUENCE OWNED BY; Schema: gentoobrowse; Owner: gentoo
+ALTER SEQUENCE repos_repoid_seq OWNED BY repos.repoid;
-- Name: seq_categoryid; Type: SEQUENCE; Schema: gentoobrowse; Owner: gentoo
CREATE SEQUENCE seq_categoryid
START WITH 1
@@ -490,6 +541,8 @@ ALTER TABLE ONLY categories ALTER COLUMN categoryid SET DEFAULT nextval('seq_cat
ALTER TABLE ONLY files ALTER COLUMN fileid SET DEFAULT nextval('seq_fileid'::regclass);
-- Name: packageid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo
ALTER TABLE ONLY packages ALTER COLUMN packageid SET DEFAULT nextval('seq_packageid'::regclass);
+-- Name: repoid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo
+ALTER TABLE ONLY repos ALTER COLUMN repoid SET DEFAULT nextval('repos_repoid_seq'::regclass);
-- Name: usegroupid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo
ALTER TABLE ONLY use_groups ALTER COLUMN usegroupid SET DEFAULT nextval('use_groups_usegroupid_seq'::regclass);
-- Name: userid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo
@@ -566,6 +619,9 @@ ALTER TABLE user_packages CLUSTER ON pk_userpackages;
-- Name: pk_users; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace:
ALTER TABLE ONLY users
ADD CONSTRAINT pk_users PRIMARY KEY (userid);
+-- Name: repos_pkey; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace:
+ALTER TABLE ONLY repos
+ ADD CONSTRAINT repos_pkey PRIMARY KEY (repoid);
-- Name: uni_ebuild_pkgver; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace:
ALTER TABLE ONLY ebuilds
ADD CONSTRAINT uni_ebuild_pkgver UNIQUE (packageid, version);