From 18c34cb67642243158836effc1dac3f678fa932c Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Mon, 6 Jun 2016 02:30:45 +0100 Subject: WIP import git change logs --- gentoobrowse-api/api/maintenance.ice | 6 + gentoobrowse-api/db/schema.sql | 50 +++++++++ gentoobrowse-api/service/Jamfile.jam | 2 + gentoobrowse-api/service/converters.h | 21 ++++ gentoobrowse-api/service/maintenanceChangeLogs.cpp | 123 +++++++++++++++++++++ gentoobrowse-api/service/maintenanceimpl.h | 1 + .../service/sql/maintenance/changeLogInsert.sql | 2 + .../service/sql/maintenance/changeLogRoots.sql | 8 ++ gentoobrowse-api/service/utils/dbUtils.cpp | 11 ++ gentoobrowse-api/service/utils/dbUtils.h | 1 + gentoobrowse-api/unittests/Jamfile.jam | 8 ++ gentoobrowse-api/unittests/mockDefs.cpp | 5 +- gentoobrowse-api/unittests/testChangeLog.cpp | 32 ++++++ gentoobrowse-api/util/update.cpp | 4 +- 14 files changed, 272 insertions(+), 2 deletions(-) create mode 100644 gentoobrowse-api/service/converters.h create mode 100644 gentoobrowse-api/service/maintenanceChangeLogs.cpp create mode 100644 gentoobrowse-api/service/sql/maintenance/changeLogInsert.sql create mode 100644 gentoobrowse-api/service/sql/maintenance/changeLogRoots.sql create mode 100644 gentoobrowse-api/unittests/testChangeLog.cpp diff --git a/gentoobrowse-api/api/maintenance.ice b/gentoobrowse-api/api/maintenance.ice index 0e80fed..a66b1d0 100644 --- a/gentoobrowse-api/api/maintenance.ice +++ b/gentoobrowse-api/api/maintenance.ice @@ -1,7 +1,13 @@ module Gentoo { + exception GitError { + int errorCode; + int errorClass; + string message; + }; interface Maintenance { idempotent void refreshPackageTree(); idempotent void refreshBugs(); + idempotent void refreshChangeLogs() throws GitError; }; }; diff --git a/gentoobrowse-api/db/schema.sql b/gentoobrowse-api/db/schema.sql index ec56da2..1a2f571 100644 --- a/gentoobrowse-api/db/schema.sql +++ b/gentoobrowse-api/db/schema.sql @@ -26,6 +26,21 @@ CREATE TYPE filterspec AS ( ALTER TYPE filterspec OWNER TO gentoo; SET default_tablespace = ''; SET default_with_oids = false; +-- Name: changelog; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace: +CREATE TABLE changelog ( + changeid integer NOT NULL, + repoid integer NOT NULL, + commitid text NOT NULL, + committime timestamp without time zone NOT NULL, + authorname text NOT NULL, + authoremail text NOT NULL, + summary text NOT NULL, + body text, + files text[] NOT NULL +); +ALTER TABLE changelog OWNER TO gentoo; +-- Name: TABLE bugs; Type: COMMENT; Schema: gentoobrowse; Owner: gentoo +COMMENT ON TABLE changelog IS 'Package repository change logs'; -- Name: bugs; Type: TABLE; Schema: gentoobrowse; Owner: gentoo; Tablespace: CREATE TABLE bugs ( bugid integer NOT NULL, @@ -37,6 +52,16 @@ CREATE TABLE bugs ( ALTER TABLE bugs OWNER TO gentoo; -- Name: TABLE bugs; Type: COMMENT; Schema: gentoobrowse; Owner: gentoo COMMENT ON TABLE bugs IS 'Import of Gentoo Bugs bug list from snapshots'; +-- Name: changelogfts(bugs); Type: FUNCTION; Schema: gentoobrowse; Owner: gentoo +CREATE FUNCTION changelogfts(cl changelog) RETURNS tsvector + LANGUAGE plpgsql IMMUTABLE + AS $$ +BEGIN + RETURN (SETWEIGHT(TO_TSVECTOR('english', cl.summary), 'A') || + SETWEIGHT(TO_TSVECTOR('english', COALESCE(cl.body, '')), 'B')); +END +$$; +ALTER FUNCTION gentoobrowse.changelogfts(cl changelog) OWNER TO gentoo; -- Name: bugfts(bugs); Type: FUNCTION; Schema: gentoobrowse; Owner: gentoo CREATE FUNCTION bugfts(b bugs) RETURNS tsvector LANGUAGE plpgsql IMMUTABLE @@ -385,6 +410,16 @@ CREATE TABLE masksets ( n smallint NOT NULL ); ALTER TABLE masksets OWNER TO gentoo; +-- Name: changeid_seq; Type: SEQUENCE; Schema: gentoobrowse; Owner: gentoo +CREATE SEQUENCE changeid_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +ALTER TABLE changeid_seq OWNER TO gentoo; +-- Name: changeid_seq; Type: SEQUENCE OWNED BY; Schema: gentoobrowse; Owner: gentoo +ALTER SEQUENCE changeid_seq OWNED BY masksets.setno; -- Name: masksets_setno_seq; Type: SEQUENCE; Schema: gentoobrowse; Owner: gentoo CREATE SEQUENCE masksets_setno_seq START WITH 1 @@ -579,6 +614,8 @@ ALTER SEQUENCE users_userid_seq OWNED BY users.userid; ALTER TABLE ONLY categories ALTER COLUMN categoryid SET DEFAULT nextval('seq_categoryid'::regclass); -- Name: fileid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo ALTER TABLE ONLY files ALTER COLUMN fileid SET DEFAULT nextval('seq_fileid'::regclass); +-- Name: changeid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo +ALTER TABLE ONLY changelog ALTER COLUMN changeid SET DEFAULT nextval('changeid_seq'::regclass); -- Name: setno; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo ALTER TABLE ONLY masksets ALTER COLUMN setno SET DEFAULT nextval('masksets_setno_seq'::regclass); -- Name: packageid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo @@ -591,6 +628,9 @@ ALTER TABLE ONLY repos ALTER COLUMN repoid SET DEFAULT nextval('repos_repoid_seq ALTER TABLE ONLY use_groups ALTER COLUMN usegroupid SET DEFAULT nextval('use_groups_usegroupid_seq'::regclass); -- Name: userid; Type: DEFAULT; Schema: gentoobrowse; Owner: gentoo ALTER TABLE ONLY users ALTER COLUMN userid SET DEFAULT nextval('users_userid_seq'::regclass); +-- Name: changelog_pk; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace: +ALTER TABLE ONLY changelog + ADD CONSTRAINT changelog_pk PRIMARY KEY (changeid); -- Name: license_pkey; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace: ALTER TABLE ONLY license ADD CONSTRAINT license_pkey PRIMARY KEY (name); @@ -665,6 +705,9 @@ ALTER TABLE ONLY users -- Name: repos_pkey; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace: ALTER TABLE ONLY repos ADD CONSTRAINT repos_pkey PRIMARY KEY (repoid); +-- Name: uni_changelog_commitid; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace: +ALTER TABLE ONLY changelog + ADD CONSTRAINT uni_changelog_commitid UNIQUE (commitid); -- Name: uni_ebuild_pkgverrepo; Type: CONSTRAINT; Schema: gentoobrowse; Owner: gentoo; Tablespace: ALTER TABLE ONLY ebuilds ADD CONSTRAINT uni_ebuild_pkgverrepo UNIQUE (packageid, version, repoid); @@ -695,6 +738,10 @@ ALTER TABLE ONLY use_group ALTER TABLE ONLY use_local ADD CONSTRAINT use_local_pkey PRIMARY KEY (packageid, use); ALTER TABLE use_local CLUSTER ON use_local_pkey; +-- Name: idx_changelog_committime; Type: INDEX; Schema: gentoobrowse; Owner: gentoo; Tablespace: +CREATE INDEX idx_changelog_committime ON changelog USING btree (committime); +-- Name: idx_changelog_fts; Type: INDEX; Schema: gentoobrowse; Owner: gentoo; Tablespace: +CREATE INDEX idx_changelog_fts ON changelog USING gin (changelogfts(changelog.*)); -- Name: idx_bug_fts; Type: INDEX; Schema: gentoobrowse; Owner: gentoo; Tablespace: CREATE INDEX idx_bug_fts ON bugs USING gin (bugfts(bugs.*)); -- Name: idx_bugs_firstseen; Type: INDEX; Schema: gentoobrowse; Owner: gentoo; Tablespace: @@ -727,6 +774,9 @@ CREATE INDEX idx_uselocal_use ON use_local USING btree (use); CREATE INDEX idx_userpackages_package ON user_packages USING btree (packageid); -- Name: uni_masksets_dateaddedn; Type: INDEX; Schema: gentoobrowse; Owner: gentoo; Tablespace: CREATE INDEX uni_masksets_dateaddedn ON masksets USING btree (dateadded, n); +-- Name: fk_changelog_repo; Type: FK CONSTRAINT; Schema: gentoobrowse; Owner: gentoo +ALTER TABLE ONLY changelog + ADD CONSTRAINT fk_changelog_repo FOREIGN KEY (repoid) REFERENCES repos(repoid) ON UPDATE CASCADE ON DELETE CASCADE; -- Name: fk_ebuild_pkg; Type: FK CONSTRAINT; Schema: gentoobrowse; Owner: gentoo ALTER TABLE ONLY ebuilds ADD CONSTRAINT fk_ebuild_pkg FOREIGN KEY (packageid) REFERENCES packages(packageid) ON UPDATE CASCADE ON DELETE CASCADE; diff --git a/gentoobrowse-api/service/Jamfile.jam b/gentoobrowse-api/service/Jamfile.jam index 2f4279a..4cb3ce6 100644 --- a/gentoobrowse-api/service/Jamfile.jam +++ b/gentoobrowse-api/service/Jamfile.jam @@ -2,6 +2,7 @@ import lex ; import icetray ; lib icetray : : : : /usr/include/icetray ; +lib git2 ; lib gentoobrowse-service : [ glob-tree *.cpp : bin ] @@ -12,6 +13,7 @@ lib gentoobrowse-service : pure ..//adhocutil icetray + git2 ..//dbppcore ..//IceBox ..//slicer diff --git a/gentoobrowse-api/service/converters.h b/gentoobrowse-api/service/converters.h new file mode 100644 index 0000000..89a7881 --- /dev/null +++ b/gentoobrowse-api/service/converters.h @@ -0,0 +1,21 @@ +#ifndef something +#define something + +#include + +namespace Slicer { + ::Gentoo::StringList + unpackPqTextArray(const std::string & s); + + std::string + packPqTextArray(const ::Gentoo::StringList & l); + + boost::posix_time::ptime + shortDateToPosixTime(const std::string & s); + + std::string + posixTimeToShortDate(const boost::posix_time::ptime & s); +} + +#endif + diff --git a/gentoobrowse-api/service/maintenanceChangeLogs.cpp b/gentoobrowse-api/service/maintenanceChangeLogs.cpp new file mode 100644 index 0000000..46ef3e2 --- /dev/null +++ b/gentoobrowse-api/service/maintenanceChangeLogs.cpp @@ -0,0 +1,123 @@ +#include "maintenanceimpl.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "converters.h" + +namespace Gentoo { + namespace Service { + static + void + gitSafe(int func) + { + if (int _giterror = func < 0) { + const git_error * e = giterr_last(); + throw GitError(_giterror, e->klass, e->message); + } + } + + static + int + onFile(const git_diff_delta * delta, float, void * fileset) + { + static_cast(fileset)->push_back(delta->old_file.path); + static_cast(fileset)->push_back(delta->new_file.path); + return 0; + } + + static + int + onBinaryFile(const git_diff_delta * delta, const git_diff_binary *, void * fileset) + { + static_cast(fileset)->push_back(delta->old_file.path); + static_cast(fileset)->push_back(delta->new_file.path); + return 0; + } + + template + std::unique_ptr + gitSafeGet(int(*get)(R**, P...), void(*release)(R*), A ... p) + { + R * r = nullptr; + gitSafe(get(&r, p...)); + return std::unique_ptr(r, release); + } + + void + Maintenance::refreshChangeLogs(const Ice::Current & c) + { + git_libgit2_init(); + AdHoc::ScopeExit shutdownlibgit2(&git_libgit2_shutdown); + + auto dbc = db->get(); + DB::TransactionScope tx(dbc.get()); + auto cli = dbc->modify(sql::maintenance::changeLogInsert.getSql()); + + dbc->select(sql::maintenance::changeLogRoots.getSql())->forEachRow>([&cli,&c](auto repoId, const auto & repoName, const auto & repoRoot, const auto & lastCommitId) { + cli->bindParamI(0, repoId); + // Open repository + auto repo = gitSafeGet(git_repository_open_ext, git_repository_free, repoRoot.c_str(), 0, nullptr); + // Set up walker + auto walker = gitSafeGet(git_revwalk_new, git_revwalk_free, repo.get()); + auto startref = c.adapter->getCommunicator()->getProperties() + ->getProperty("GentooBrowseAPI.ChangeLogStart." + repoName); + if (startref.empty()) { + gitSafe(git_revwalk_push_head(walker.get())); + } + else { + git_oid oid; + gitSafe(git_oid_fromstr(&oid, startref.c_str())); + gitSafe(git_revwalk_push(walker.get(), &oid)); + } + git_revwalk_sorting(walker.get(), GIT_SORT_TIME); + + git_oid oid; + char str[GIT_OID_HEXSZ + 1]; + // Walk through revisions + for (; !git_revwalk_next(&oid, walker.get()); ) { + git_oid_tostr(str, sizeof(str), &oid); + if (lastCommitId && *lastCommitId == str) { + break; + } + // Get commit + auto commit = gitSafeGet(git_commit_lookup, git_commit_free, repo.get(), &oid); + + // Get commit's tree + auto currentTree = gitSafeGet(git_commit_tree, git_tree_free, commit.get()); + + // Collect all files change in commit from all parents + std::unique_ptr parentTree(nullptr, git_tree_free); + if (git_commit_parentcount(commit.get()) > 0) { + auto parentCommit = gitSafeGet(git_commit_parent, git_commit_free, commit.get(), 0); + // Get parent tree + parentTree = gitSafeGet(git_commit_tree, git_tree_free, parentCommit.get()); + } + // Get tree to tree diff + auto diff = gitSafeGet(git_diff_tree_to_tree, git_diff_free, repo.get(), currentTree.get(), parentTree.get(), nullptr); + // Compare trees + StringList fs; + git_diff_foreach(diff.get(), onFile, onBinaryFile, nullptr, nullptr, &fs); + + // Insert commit into DB + cli->bindParamS(1, str); + auto sig = git_commit_author(commit.get()); + cli->bindParamT(2, boost::posix_time::from_time_t(sig->when.time)); + cli->bindParamS(3, git_commit_summary(commit.get())); + Utils::Database::bindOptionalS(cli.get(), 4, git_commit_body(commit.get())); + cli->bindParamS(5, sig->name); + cli->bindParamS(6, sig->email); + cli->bindParamS(7, Slicer::packPqTextArray(fs)); + cli->execute(); + } + }); + } + } +} + diff --git a/gentoobrowse-api/service/maintenanceimpl.h b/gentoobrowse-api/service/maintenanceimpl.h index 9bd451c..6206836 100644 --- a/gentoobrowse-api/service/maintenanceimpl.h +++ b/gentoobrowse-api/service/maintenanceimpl.h @@ -35,6 +35,7 @@ namespace Gentoo { void refreshPackageTree(const Ice::Current &) override; void refreshBugs(const Ice::Current &) override; + void refreshChangeLogs(const Ice::Current &) override; void associateBugs(DB::Connection *); private: diff --git a/gentoobrowse-api/service/sql/maintenance/changeLogInsert.sql b/gentoobrowse-api/service/sql/maintenance/changeLogInsert.sql new file mode 100644 index 0000000..7a6c00a --- /dev/null +++ b/gentoobrowse-api/service/sql/maintenance/changeLogInsert.sql @@ -0,0 +1,2 @@ +INSERT INTO gentoobrowse.changelog(repoid, commitid, committime, summary, body, authorname, authoremail, files) +VALUES(?, ?, ?, ?, ?, ?, ?, ?) diff --git a/gentoobrowse-api/service/sql/maintenance/changeLogRoots.sql b/gentoobrowse-api/service/sql/maintenance/changeLogRoots.sql new file mode 100644 index 0000000..ce5d3a3 --- /dev/null +++ b/gentoobrowse-api/service/sql/maintenance/changeLogRoots.sql @@ -0,0 +1,8 @@ +SELECT r.repoid, r.name, r.path, cl.commitid +FROM gentoobrowse.repos r + LEFT JOIN LATERAL ( + SELECT commitid + FROM gentoobrowse.changelog cl + WHERE cl.repoid = r.repoid + ORDER BY committime DESC + limit 1) cl ON TRUE diff --git a/gentoobrowse-api/service/utils/dbUtils.cpp b/gentoobrowse-api/service/utils/dbUtils.cpp index ae07ff4..8f3ebf2 100644 --- a/gentoobrowse-api/service/utils/dbUtils.cpp +++ b/gentoobrowse-api/service/utils/dbUtils.cpp @@ -32,6 +32,17 @@ namespace Gentoo { } } + void + bindOptionalS(DB::Command * db, unsigned int c, const char * v) + { + if (v) { + db->bindParamS(c, v); + } + else { + db->bindNull(c); + } + } + std::atomic tempTableNumber; std::string tempTableName() diff --git a/gentoobrowse-api/service/utils/dbUtils.h b/gentoobrowse-api/service/utils/dbUtils.h index aa46c33..bce7f38 100644 --- a/gentoobrowse-api/service/utils/dbUtils.h +++ b/gentoobrowse-api/service/utils/dbUtils.h @@ -12,6 +12,7 @@ namespace Gentoo { namespace Database { bool bindOptionalsS(DB::Command * db, unsigned int c, const std::vector > & vs); void bindOptionalS(DB::Command * db, unsigned int c, const IceUtil::Optional & v); + void bindOptionalS(DB::Command * db, unsigned int c, const char * v); std::string createTempWith(DB::Connection *, const std::string &, const std::set & keys = std::set()); std::string emptyClone(DB::Connection *, const std::string &); diff --git a/gentoobrowse-api/unittests/Jamfile.jam b/gentoobrowse-api/unittests/Jamfile.jam index 465160a..97f5c45 100644 --- a/gentoobrowse-api/unittests/Jamfile.jam +++ b/gentoobrowse-api/unittests/Jamfile.jam @@ -57,6 +57,14 @@ run testCommon : testDependAll ; +run + testChangeLog.cpp + : : : + ../db/schema.sql + BOOST_TEST_DYN_LINK + testCommon + : testChangeLog ; + run testNews.cpp ../service/utils/fileUtils.cpp ../service/news.cpp ../service/utils/lexer.cpp : : : diff --git a/gentoobrowse-api/unittests/mockDefs.cpp b/gentoobrowse-api/unittests/mockDefs.cpp index 2d89fab..3d53b42 100644 --- a/gentoobrowse-api/unittests/mockDefs.cpp +++ b/gentoobrowse-api/unittests/mockDefs.cpp @@ -9,7 +9,10 @@ Service::Service() : } Maintenance::Maintenance() : - IceTray::DryIce({ "--GentooBrowseAPI.BugRoot=file://" + (rootDir / "fixtures" / "bugs").string() }), + IceTray::DryIce({ + "--GentooBrowseAPI.ChangeLogStart.gentoo=f8e9b2f0044232213a6e0299a3f31752e8dd9d24", + "--GentooBrowseAPI.BugRoot=file://" + (rootDir / "fixtures" / "bugs").string() + }), PQ::Mock("user=postgres dbname=postgres", "GentooBrowseAPI", { rootDir.parent_path() / "db" / "schema.sql", rootDir / "basedata.sql" }) diff --git a/gentoobrowse-api/unittests/testChangeLog.cpp b/gentoobrowse-api/unittests/testChangeLog.cpp new file mode 100644 index 0000000..412b328 --- /dev/null +++ b/gentoobrowse-api/unittests/testChangeLog.cpp @@ -0,0 +1,32 @@ +#define BOOST_TEST_MODULE TestChangeLog +#include + +#include "mockDefs.h" +#include +#include + +BOOST_GLOBAL_FIXTURE( Maintenance ); + +BOOST_FIXTURE_TEST_SUITE(tp, TestClient) + +BOOST_AUTO_TEST_CASE( importChangeLogs ) +{ + auto db = DB::ConnectionPtr(DB::MockDatabase::openConnectionTo("GentooBrowseAPI")); + auto insRepo = db->modify("INSERT INTO gentoobrowse.repos(name, path) VALUES(?, ?)"); + insRepo->bindParamS(0, "gentoo"); + insRepo->bindParamS(1, "/usr/portage"); + insRepo->execute(); + insRepo = db->modify("INSERT INTO gentoobrowse.changelog(repoid, commitid, committime, authorname, authoremail, summary, files) VALUES(1, ?, NOW(), '', '', '', ARRAY[]::TEXT[])"); + // this an arbitrary commit from history to test we don't report things again (i.e. we should stop here) + insRepo->bindParamS(0, "ce9d693cedc899ee85351e8bc41b7f85db6b0d39"); + insRepo->execute(); + + const int64_t expectedLogCount = 125; + m->refreshChangeLogs(); + + SQL_REQUIRE_EQUAL("SELECT COUNT(*) FROM gentoobrowse.changelog", int64_t, expectedLogCount); + db->execute("COPY gentoobrowse.changelog TO '/tmp/changelog.tsv'"); +} + +BOOST_AUTO_TEST_SUITE_END(); + diff --git a/gentoobrowse-api/util/update.cpp b/gentoobrowse-api/util/update.cpp index 01fb263..e04392d 100644 --- a/gentoobrowse-api/util/update.cpp +++ b/gentoobrowse-api/util/update.cpp @@ -8,13 +8,14 @@ int main(int c, char ** v) { std::string endpoint; - bool background, tree, bugs; + bool background, tree, bugs, changeLog; po::options_description opts("Gentoo Browse Util::Update options"); opts.add_options() ("endpoint", po::value(&endpoint)->default_value("tcp -p 9001"), "Service endpoint") ("background,b", po::value(&background)->default_value(false)->zero_tokens(), "Background") ("bugs", po::value(&bugs)->default_value(true), "Update bugs") ("tree", po::value(&tree)->default_value(true), "Update tree") + ("changeLog", po::value(&changeLog)->default_value(true), "Update change log") ("help,h", "Show help") ; @@ -34,6 +35,7 @@ main(int c, char ** v) std::set jobs; if (tree) jobs.insert(m->begin_refreshPackageTree()); if (bugs) jobs.insert(m->begin_refreshBugs()); + if (changeLog) jobs.insert(m->begin_refreshChangeLogs()); std::for_each(jobs.begin(), jobs.end(), [](const auto & j) { j->waitForSent(); }); -- cgit v1.2.3