From 657376f57a45e1b243346441a35e8eb3f3fc8eb0 Mon Sep 17 00:00:00 2001
From: Dan Goodliffe <dan@randomdan.homeip.net>
Date: Mon, 13 Jun 2016 21:18:04 +0100
Subject: Import all commits regardless of age

---
 gentoobrowse-api/service/maintenanceChangeLogs.cpp        | 15 ++++++++++++---
 .../service/sql/maintenance/changeLogRepoCommits.sql      |  3 +++
 .../service/sql/maintenance/changeLogRoots.sql            |  8 +-------
 gentoobrowse-api/unittests/mockDefs.cpp                   |  2 +-
 gentoobrowse-api/unittests/testChangeLog.cpp              | 10 ++++------
 5 files changed, 21 insertions(+), 17 deletions(-)
 create mode 100644 gentoobrowse-api/service/sql/maintenance/changeLogRepoCommits.sql

diff --git a/gentoobrowse-api/service/maintenanceChangeLogs.cpp b/gentoobrowse-api/service/maintenanceChangeLogs.cpp
index 027429e..4a6a278 100644
--- a/gentoobrowse-api/service/maintenanceChangeLogs.cpp
+++ b/gentoobrowse-api/service/maintenanceChangeLogs.cpp
@@ -8,6 +8,7 @@
 #include <scopeExit.h>
 #include <sql/maintenance/changeLogRoots.sql.h>
 #include <sql/maintenance/changeLogInsert.sql.h>
+#include <sql/maintenance/changeLogRepoCommits.sql.h>
 #include <portage-models.h>
 #include "converters.h"
 
@@ -60,7 +61,13 @@ namespace Gentoo {
 			DB::TransactionScope tx(dbc.get());
 			auto cli = dbc->modify(sql::maintenance::changeLogInsert.getSql());
 
-			dbc->select(sql::maintenance::changeLogRoots.getSql())->forEachRow<int64_t, std::string, std::string, boost::optional<std::string>>([&cli,&c](auto repoId, const auto & repoName, const auto & repoRoot, const auto & lastCommitId) {
+			dbc->select(sql::maintenance::changeLogRoots.getSql())->forEachRow<int64_t, std::string, std::string>([&cli,&c,&dbc](auto repoId, const auto & repoName, const auto & repoRoot) {
+					std::set<std::string> processedChanges;
+					{
+						auto changes = dbc->select(sql::maintenance::changeLogRepoCommits.getSql());
+						changes->bindParamI(0, repoId);
+						changes->forEachRow<std::string>([&processedChanges](const auto & c) { processedChanges.insert(c); });
+					}
 					cli->bindParamI(0, repoId);
 					// Open repository
 					auto repo = gitSafeGet(git_repository_open_ext, git_repository_free, repoRoot.c_str(), 0, nullptr);
@@ -83,8 +90,10 @@ namespace Gentoo {
 					// Walk through revisions
 					for (; !git_revwalk_next(&oid, walker.get()); ) {
 						git_oid_tostr(str, sizeof(str), &oid);
-						if (lastCommitId && *lastCommitId == str) {
-							break;
+						auto i = processedChanges.find(str);
+						if (i != processedChanges.end()) {
+							processedChanges.erase(i);
+							continue;
 						}
 						// Get commit
 						auto commit = gitSafeGet(git_commit_lookup, git_commit_free, repo.get(), &oid);
diff --git a/gentoobrowse-api/service/sql/maintenance/changeLogRepoCommits.sql b/gentoobrowse-api/service/sql/maintenance/changeLogRepoCommits.sql
new file mode 100644
index 0000000..99f5db8
--- /dev/null
+++ b/gentoobrowse-api/service/sql/maintenance/changeLogRepoCommits.sql
@@ -0,0 +1,3 @@
+SELECT commitid
+FROM gentoobrowse.changelog
+WHERE repoId = ?
diff --git a/gentoobrowse-api/service/sql/maintenance/changeLogRoots.sql b/gentoobrowse-api/service/sql/maintenance/changeLogRoots.sql
index ce5d3a3..ac05611 100644
--- a/gentoobrowse-api/service/sql/maintenance/changeLogRoots.sql
+++ b/gentoobrowse-api/service/sql/maintenance/changeLogRoots.sql
@@ -1,8 +1,2 @@
-SELECT r.repoid, r.name, r.path, cl.commitid
+SELECT r.repoid, r.name, r.path
 FROM gentoobrowse.repos r
-	LEFT JOIN LATERAL (
-		SELECT commitid
-		FROM gentoobrowse.changelog cl
-		WHERE cl.repoid = r.repoid
-		ORDER BY committime DESC
-		limit 1) cl ON TRUE
diff --git a/gentoobrowse-api/unittests/mockDefs.cpp b/gentoobrowse-api/unittests/mockDefs.cpp
index 3d53b42..91b8164 100644
--- a/gentoobrowse-api/unittests/mockDefs.cpp
+++ b/gentoobrowse-api/unittests/mockDefs.cpp
@@ -10,7 +10,7 @@ Service::Service() :
 
 Maintenance::Maintenance() :
 	IceTray::DryIce({
-			"--GentooBrowseAPI.ChangeLogStart.gentoo=f8e9b2f0044232213a6e0299a3f31752e8dd9d24",
+			"--GentooBrowseAPI.ChangeLogStart.gentoo=68190573ce9846f82b8cb4e8a6376758c8257184",
 			"--GentooBrowseAPI.BugRoot=file://" + (rootDir / "fixtures" / "bugs").string()
 		}),
 	PQ::Mock("user=postgres dbname=postgres", "GentooBrowseAPI", {
diff --git a/gentoobrowse-api/unittests/testChangeLog.cpp b/gentoobrowse-api/unittests/testChangeLog.cpp
index df01a7b..6cd814a 100644
--- a/gentoobrowse-api/unittests/testChangeLog.cpp
+++ b/gentoobrowse-api/unittests/testChangeLog.cpp
@@ -16,17 +16,15 @@ BOOST_AUTO_TEST_CASE( importChangeLogs )
 	insRepo->bindParamS(0, "gentoo");
 	insRepo->bindParamS(1, "/usr/portage");
 	insRepo->execute();
-	insRepo = db->modify("INSERT INTO gentoobrowse.changelog(repoid, commitid, committime, authorname, authoremail, summary, files) VALUES(1, ?, NOW(), '', '', '', ARRAY[]::TEXT[])");
-	// this an arbitrary commit from history to test we don't report things again (i.e. we should stop here)
-	insRepo->bindParamS(0, "ce9d693cedc899ee85351e8bc41b7f85db6b0d39");
-	insRepo->execute();
 
 	const int64_t expectedLogCount = 125;
-	const int64_t expectedFilesCount = 9;
+	const int64_t expectedFilesCommitCount = 1;
+	const int64_t expectedFilesCount = 2;
 	m->refreshChangeLogs();
 
 	SQL_REQUIRE_EQUAL("SELECT COUNT(*) FROM gentoobrowse.changelog", int64_t, expectedLogCount);
-	SQL_REQUIRE_EQUAL("SELECT ARRAY_LENGTH(files, 1) FROM gentoobrowse.changelog WHERE commitid = 'f8e9b2f0044232213a6e0299a3f31752e8dd9d24'", int64_t, expectedFilesCount);
+	SQL_REQUIRE_EQUAL("SELECT COUNT(*) FROM gentoobrowse.changelog WHERE commitid = '599aeeacf88dbe805a8310ba1c4c9dd70aa2baf4'", int64_t, expectedFilesCommitCount);
+	SQL_REQUIRE_EQUAL("SELECT ARRAY_LENGTH(files, 1) FROM gentoobrowse.changelog WHERE commitid = '599aeeacf88dbe805a8310ba1c4c9dd70aa2baf4'", int64_t, expectedFilesCount);
 	db->execute("COPY gentoobrowse.changelog TO '/tmp/changelog.tsv'");
 }
 
-- 
cgit v1.2.3