diff options
author | Dan Goodliffe <dan@randomdan.homeip.net> | 2015-08-14 00:57:16 +0100 |
---|---|---|
committer | Dan Goodliffe <dan@randomdan.homeip.net> | 2015-08-14 00:57:16 +0100 |
commit | 4a76f1420a948ce486af58c9b91bbb4ae14ef45b (patch) | |
tree | 9cd601fa7f376256433d3d61d6859ed635682c2a | |
parent | Additions to show repo name (diff) | |
download | gentoobrowse-4a76f1420a948ce486af58c9b91bbb4ae14ef45b.tar.bz2 gentoobrowse-4a76f1420a948ce486af58c9b91bbb4ae14ef45b.tar.xz gentoobrowse-4a76f1420a948ce486af58c9b91bbb4ae14ef45b.zip |
Script and table to import news
-rw-r--r-- | gentoobrowse/console/news.xml | 46 | ||||
-rw-r--r-- | gentoobrowse/datasources/schema.sql | 17 |
2 files changed, 63 insertions, 0 deletions
diff --git a/gentoobrowse/console/news.xml b/gentoobrowse/console/news.xml new file mode 100644 index 0000000..109524c --- /dev/null +++ b/gentoobrowse/console/news.xml @@ -0,0 +1,46 @@ +<?xml version="1.0"?> +<packageimport name="import news" xmlns:xi="http://www.w3.org/2001/XInclude" + xmlns:project2="http://project2.randomdan.homeip.net"> + <project2:sqlmerge name="merge news" datasource="postgres" targettable="news"> + <updatewhere> + a.newsid IN ( + SELECT f.pathparts[3] + FROM files f + WHERE f.filetypeid = 11 + AND (f.cachedat IS NULL OR f.cachedat != f.moddate)) + </updatewhere> + <sql> + SELECT + newsid, + (SELECT REGEXP_MATCHES(p[1], 'Title: ([^\n]+)'))[1] title, + CAST((SELECT REGEXP_MATCHES(p[1], 'Posted: ([^\n]+)'))[1] AS DATE) posted, + (SELECT REGEXP_MATCHES(P[1], 'Author: ([^\n]+) <'))[1] authorname, + (SELECT REGEXP_MATCHES(P[1], 'Author:.*?<([^>]+)>'))[1] authoremail, + (SELECT ARRAY_AGG(pkg) FROM (SELECT (REGEXP_MATCHES(P[1], 'Display-If-Installed: ([^\n]+)', 'g') )[1] pkg) p) atomspec, + (SELECT ARRAY_AGG(REGEXP_REPLACE(PARA, '\s+', ' ', 'g')) FROM (SELECT REGEXP_SPLIT_TO_TABLE(p[2], '\n\n') para) p) body, + (SELECT ARRAY_AGG(url) FROM (SELECT (REGEXP_MATCHES(p[3], '(\w+://[^\s]+)', 'g'))[1] url) u) urls + FROM ( + SELECT pathparts[3] newsid, REGEXP_MATCHES(PG_READ_FILE(filename), '^(.*?)\n\n(.*?)(\n(\n\[[\d]+\] \w+://[^ ]+)*)?$') p + FROM files f + WHERE f.filetypeid = 11 + AND (f.cachedat IS NULL OR f.cachedat != f.moddate) + ) newsfiles + </sql> + <columns> + <newsid key="true" /> + <posted /> + <authorname /> + <authoremail /> + <atomspec /> + <body /> + <urls /> + </columns> + </project2:sqlmerge> + <project2:sqltask name="update" datasource="postgres"> + <sql> + UPDATE files SET + cachedat = moddate + WHERE filetypeid = 11 + </sql> + </project2:sqltask> +</packageimport> diff --git a/gentoobrowse/datasources/schema.sql b/gentoobrowse/datasources/schema.sql index 161af0d..fbc86ec 100644 --- a/gentoobrowse/datasources/schema.sql +++ b/gentoobrowse/datasources/schema.sql @@ -369,6 +369,18 @@ CREATE SEQUENCE masksets_setno_seq ALTER TABLE masksets_setno_seq OWNER TO gentoo; -- Name: masksets_setno_seq; Type: SEQUENCE OWNED BY; Schema: gentoobrowse; Owner: gentoo ALTER SEQUENCE masksets_setno_seq OWNED BY masksets.setno; +-- Name: news; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace: +CREATE TABLE news ( + newsid text NOT NULL, + title text NOT NULL, + posted date NOT NULL, + authorname text NOT NULL, + authoremail text NOT NULL, + atomspec text[], + body text[] NOT NULL, + urls text[] +); +ALTER TABLE news OWNER TO gentoo; -- Name: package_bugs; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace: CREATE TABLE package_bugs ( packageid integer NOT NULL, @@ -565,6 +577,9 @@ ALTER TABLE ONLY filetypes -- Name: pk_files; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace: ALTER TABLE ONLY files ADD CONSTRAINT pk_files PRIMARY KEY (fileid); +-- Name: pk_news; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace: +ALTER TABLE ONLY news + ADD CONSTRAINT pk_news PRIMARY KEY (newsid); -- Name: pk_package_changelogs; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace: ALTER TABLE ONLY package_changelogs ADD CONSTRAINT pk_package_changelogs PRIMARY KEY (packageid, date, n, repoid); @@ -646,6 +661,8 @@ CREATE INDEX idx_ebuildrdeps_package ON ebuild_rdeps USING btree (packageid); CREATE INDEX idx_ebuilds_firstseen ON ebuilds USING btree (firstseen); -- Name: idx_ebuilduses_use; Type: INDEX; Schema: gentoobrowse; Owner: gentoo; Tablespace: CREATE INDEX idx_ebuilduses_use ON ebuild_uses USING btree (use); +-- Name: idx_news_posted; Type: INDEX; Schema: gentoobrowse; Owner: gentoo; Tablespace: +CREATE INDEX idx_news_posted ON news USING btree (posted); -- Name: idx_package_name; Type: INDEX; Schema: gentoobrowse; Owner: gentoo; Tablespace: CREATE INDEX idx_package_name ON packages USING btree (name); -- Name: idx_packagebugs_bug; Type: INDEX; Schema: gentoobrowse; Owner: gentoo; Tablespace: |