summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Goodliffe <dan@randomdan.homeip.net>2016-10-15 15:10:41 +0100
committerDan Goodliffe <dan@randomdan.homeip.net>2016-10-15 15:10:41 +0100
commitc34f252737eb7ea5c2fcaf2de32366218b05f23e (patch)
treeaa1b43eda00f7ab8ca1c745ac0a735a01dc5917d
parentOnly dump DB when asked for (diff)
downloadgentoobrowse-api-c34f252737eb7ea5c2fcaf2de32366218b05f23e.tar.bz2
gentoobrowse-api-c34f252737eb7ea5c2fcaf2de32366218b05f23e.tar.xz
gentoobrowse-api-c34f252737eb7ea5c2fcaf2de32366218b05f23e.zip
Replace bespoke parser for masks with an AdHoc Lexer
-rw-r--r--gentoobrowse-api/db/schema.sql2
-rw-r--r--gentoobrowse-api/domain/portage-models.ice2
-rw-r--r--gentoobrowse-api/service/maintenance/masksProcessor.cpp85
-rw-r--r--gentoobrowse-api/unittests/testMaintenance.cpp10
4 files changed, 53 insertions, 46 deletions
diff --git a/gentoobrowse-api/db/schema.sql b/gentoobrowse-api/db/schema.sql
index d4bbce6..8951297 100644
--- a/gentoobrowse-api/db/schema.sql
+++ b/gentoobrowse-api/db/schema.sql
@@ -424,7 +424,7 @@ ALTER TABLE license OWNER TO gentoo;
CREATE TABLE masksets (
setno integer NOT NULL,
person text,
- email text NOT NULL,
+ email text,
dateadded date NOT NULL,
message text NOT NULL,
atomspec text[] NOT NULL,
diff --git a/gentoobrowse-api/domain/portage-models.ice b/gentoobrowse-api/domain/portage-models.ice
index dd57551..5b7ddff 100644
--- a/gentoobrowse-api/domain/portage-models.ice
+++ b/gentoobrowse-api/domain/portage-models.ice
@@ -94,7 +94,7 @@ module Gentoo {
class Mask {
int setno;
optional(0) string person;
- string email;
+ optional(1) string email;
[ "slicer:conversion:boost.posix_time.ptime:posixTimeToShortDate:shortDateToPosixTime" ]
string dateadded;
string message;
diff --git a/gentoobrowse-api/service/maintenance/masksProcessor.cpp b/gentoobrowse-api/service/maintenance/masksProcessor.cpp
index 1634167..484bceb 100644
--- a/gentoobrowse-api/service/maintenance/masksProcessor.cpp
+++ b/gentoobrowse-api/service/maintenance/masksProcessor.cpp
@@ -6,14 +6,19 @@
#include <tablepatch.h>
#include "utils/dbUtils.h"
#include <glibmm/regex.h>
-#include <glibmm/iochannel.h>
+#include <fileUtils.h>
+#include <lexer-regex.h>
+#include <lexer.h>
#include "sql/maintenance/masksSets.sql.h"
#include "sql/maintenance/masksEbuilds.sql.h"
-static Glib::RefPtr<Glib::Regex> maskHead = Glib::Regex::create(
- "^# ([^<]+)? ?<(.+?@[^>]+)> \\((\\d+ *(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec) \\d+)\\)",
- Glib::RegexCompileFlags::REGEX_CASELESS);
-static Glib::RefPtr<Glib::Regex> maskDesc = Glib::Regex::create("^# (.*)");
+static AdHoc::Lexer::PatternPtr maskHead = AdHoc::LexerMatchers::regex(
+ "^# ([^<\n]+)? ?(<(.+?@[^\n>]+)>?)? \\((\\d+ *(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\\w* \\d+)\\)$\n",
+ (GRegexCompileFlags)(G_REGEX_OPTIMIZE | G_REGEX_CASELESS | G_REGEX_UNGREEDY | G_REGEX_MULTILINE));
+static AdHoc::Lexer::PatternPtr maskDesc = AdHoc::LexerMatchers::regex("^# *([^\n]*)$\n", G_REGEX_MULTILINE);
+static AdHoc::Lexer::PatternPtr atom = AdHoc::LexerMatchers::regex("^([^\n/]+/[^\n]*)$\n?", G_REGEX_MULTILINE);
+static AdHoc::Lexer::PatternPtr end = AdHoc::LexerMatchers::regex("^$\n", G_REGEX_MULTILINE);
+static AdHoc::Lexer::PatternPtr discard = AdHoc::LexerMatchers::regex("^([^\n]*)$\n?", G_REGEX_MULTILINE);
namespace Gentoo {
namespace Service {
@@ -37,45 +42,45 @@ namespace Gentoo {
{ "atomSpec", "text[]" }
});
- auto iochannel = Glib::IOChannel::create_from_file(path.string(), "r");
- Glib::MatchInfo matchesHead, matchesDesc;
+ Glib::ustring date;
+ boost::optional<Glib::ustring> person, email;
std::set<std::string> atoms;
std::list<std::string> message;
- Glib::ustring line, person, email, date;
- bool inMask = false;
int n = 0;
auto i = tempTable.second;
- while (iochannel->read_line(line) == Glib::IO_STATUS_NORMAL) {
- boost::algorithm::trim_right_if(line, &g_unichar_isspace);
- if (maskHead->match(line, matchesHead) && matchesHead.get_match_count() == 4) {
- // Mask header
- inMask = true;
- person = matchesHead.fetch(1);
- email = matchesHead.fetch(2);
- date = matchesHead.fetch(3);
- }
- else if (inMask && maskDesc->match(line, matchesDesc) && matchesDesc.get_match_count() == 2) {
- // Mask message
- message.push_back(line);
- }
- else if (inMask && !line.empty()) {
- // Masked atom
- atoms.insert(line);
- }
- else if (inMask && line.empty()) {
- // End
- inMask = false;
- i->bindParamS(0, "{" + boost::algorithm::join(atoms, ",") + "}");
- i->bindParamS(1, date);
- i->bindParamS(2, email);
- i->bindParamS(3, boost::algorithm::join(message, " "));
- i->bindParamI(4, ++n);
- i->bindParamS(5, person);
- i->execute();
- atoms.clear();
- message.clear();
- }
- }
+ AdHoc::FileUtils::MemMap f(path);
+ AdHoc::Lexer l({
+ { { AdHoc::Lexer::InitialState }, maskHead, [&](auto e)
+ {
+ person = e->pattern()->match(1);
+ email = e->pattern()->match(3);
+ date = *e->pattern()->match(4);
+ e->pushState("mask");
+ } },
+ { { "mask" }, maskDesc, [&](auto e)
+ {
+ message.push_back(*e->pattern()->match(1));
+ } },
+ { { "mask" }, atom, [&](auto e)
+ {
+ atoms.insert(*e->pattern()->match(1));
+ } },
+ { { "mask" }, end, [&](auto e)
+ {
+ i->bindParamS(0, "{" + boost::algorithm::join(atoms, ",") + "}");
+ i->bindParamS(1, date);
+ if (email) i->bindParamS(2, *email); else i->bindNull(2);
+ i->bindParamS(3, boost::algorithm::join(message, " "));
+ i->bindParamI(4, ++n);
+ if (person) i->bindParamS(5, *person); else i->bindNull(5);
+ i->execute();
+ atoms.clear();
+ message.clear();
+ e->popState();
+ } },
+ { { AdHoc::Lexer::InitialState }, discard, [&](auto) { } }
+ });
+ l.extract((gchar *)f.data, f.getStat().st_size);
DB::TablePatch p;
p.dest = "gentoobrowse.masksets";
diff --git a/gentoobrowse-api/unittests/testMaintenance.cpp b/gentoobrowse-api/unittests/testMaintenance.cpp
index a845b7c..5cceb2d 100644
--- a/gentoobrowse-api/unittests/testMaintenance.cpp
+++ b/gentoobrowse-api/unittests/testMaintenance.cpp
@@ -41,7 +41,8 @@ BOOST_FIXTURE_TEST_SUITE(tp, TestClient)
void
doRefreshPackageTree(SampleData & sd, DB::ConnectionPtr db, const std::string & archive, const std::string & dir,
Gentoo::MaintenancePrx m, int64_t files, int64_t cats, int64_t devvcs, int64_t pkgs, int64_t ebs, int64_t ebus,
- int64_t ebas, int64_t pus, int64_t ug, int64_t ul, int64_t ugs, int64_t ugds, int64_t deps, int64_t rdeps, int64_t news)
+ int64_t ebas, int64_t pus, int64_t ug, int64_t ul, int64_t ugs, int64_t ugds, int64_t deps, int64_t rdeps,
+ int64_t news, int64_t masks)
{
if (!archive.empty()) {
sd.extract(archive, dir);
@@ -64,6 +65,7 @@ doRefreshPackageTree(SampleData & sd, DB::ConnectionPtr db, const std::string &
SQL_REQUIRE_EQUAL("SELECT COUNT(*) FROM gentoobrowse.ebuild_deps", int64_t, deps);
SQL_REQUIRE_EQUAL("SELECT COUNT(*) FROM gentoobrowse.ebuild_rdeps", int64_t, rdeps);
SQL_REQUIRE_EQUAL("SELECT COUNT(*) FROM gentoobrowse.news", int64_t, news);
+ SQL_REQUIRE_EQUAL("SELECT COUNT(*) FROM gentoobrowse.masksets", int64_t, masks);
}
void
@@ -99,15 +101,15 @@ BOOST_AUTO_TEST_CASE( refreshPackageTree )
insRepo->execute();
doRefreshPackageTree(sd, db, "4156eb45cf3b0ce1d7125b84efd8688c2d6e831d", "gentoo",
- m, 2084, 5, 1, 482, 981, 3626, 4593, 501, 393, 238, 50, 1573, 2008, 1543, 81);
+ m, 2084, 5, 1, 482, 981, 3626, 4593, 501, 393, 238, 50, 1573, 2008, 1543, 81, 152);
dumpDb(db);
doRefreshPackageTree(sd, db, "756569aa764177340726dd3d40b41d89b11b20c7", "gentoo",
- m, 2087, 5, 1, 484, 982, 3638, 4599, 503, 393, 238, 50, 1573, 2009, 1546, 79);
+ m, 2087, 5, 1, 484, 982, 3638, 4599, 503, 393, 238, 50, 1573, 2009, 1546, 79, 152);
dumpDb(db);
doRefreshPackageTree(sd, db, "", "gentoo",
- m, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ m, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
m->refreshPackageTree();
}