From 7ed457ae9a9e42846fd019c45677d9ad12e5c870 Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Sat, 15 Oct 2016 15:10:41 +0100 Subject: Replace bespoke parser for masks with an AdHoc Lexer --- gentoobrowse-api/db/schema.sql | 2 +- gentoobrowse-api/domain/portage-models.ice | 2 +- .../service/maintenance/masksProcessor.cpp | 85 ++++++++++++---------- gentoobrowse-api/unittests/testMaintenance.cpp | 10 ++- 4 files changed, 53 insertions(+), 46 deletions(-) diff --git a/gentoobrowse-api/db/schema.sql b/gentoobrowse-api/db/schema.sql index d4bbce6..8951297 100644 --- a/gentoobrowse-api/db/schema.sql +++ b/gentoobrowse-api/db/schema.sql @@ -424,7 +424,7 @@ ALTER TABLE license OWNER TO gentoo; CREATE TABLE masksets ( setno integer NOT NULL, person text, - email text NOT NULL, + email text, dateadded date NOT NULL, message text NOT NULL, atomspec text[] NOT NULL, diff --git a/gentoobrowse-api/domain/portage-models.ice b/gentoobrowse-api/domain/portage-models.ice index dd57551..5b7ddff 100644 --- a/gentoobrowse-api/domain/portage-models.ice +++ b/gentoobrowse-api/domain/portage-models.ice @@ -94,7 +94,7 @@ module Gentoo { class Mask { int setno; optional(0) string person; - string email; + optional(1) string email; [ "slicer:conversion:boost.posix_time.ptime:posixTimeToShortDate:shortDateToPosixTime" ] string dateadded; string message; diff --git a/gentoobrowse-api/service/maintenance/masksProcessor.cpp b/gentoobrowse-api/service/maintenance/masksProcessor.cpp index 1634167..484bceb 100644 --- a/gentoobrowse-api/service/maintenance/masksProcessor.cpp +++ b/gentoobrowse-api/service/maintenance/masksProcessor.cpp @@ -6,14 +6,19 @@ #include #include "utils/dbUtils.h" #include -#include +#include +#include +#include #include "sql/maintenance/masksSets.sql.h" #include "sql/maintenance/masksEbuilds.sql.h" -static Glib::RefPtr maskHead = Glib::Regex::create( - "^# ([^<]+)? ?<(.+?@[^>]+)> \\((\\d+ *(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec) \\d+)\\)", - Glib::RegexCompileFlags::REGEX_CASELESS); -static Glib::RefPtr maskDesc = Glib::Regex::create("^# (.*)"); +static AdHoc::Lexer::PatternPtr maskHead = AdHoc::LexerMatchers::regex( + "^# ([^<\n]+)? ?(<(.+?@[^\n>]+)>?)? \\((\\d+ *(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\\w* \\d+)\\)$\n", + (GRegexCompileFlags)(G_REGEX_OPTIMIZE | G_REGEX_CASELESS | G_REGEX_UNGREEDY | G_REGEX_MULTILINE)); +static AdHoc::Lexer::PatternPtr maskDesc = AdHoc::LexerMatchers::regex("^# *([^\n]*)$\n", G_REGEX_MULTILINE); +static AdHoc::Lexer::PatternPtr atom = AdHoc::LexerMatchers::regex("^([^\n/]+/[^\n]*)$\n?", G_REGEX_MULTILINE); +static AdHoc::Lexer::PatternPtr end = AdHoc::LexerMatchers::regex("^$\n", G_REGEX_MULTILINE); +static AdHoc::Lexer::PatternPtr discard = AdHoc::LexerMatchers::regex("^([^\n]*)$\n?", G_REGEX_MULTILINE); namespace Gentoo { namespace Service { @@ -37,45 +42,45 @@ namespace Gentoo { { "atomSpec", "text[]" } }); - auto iochannel = Glib::IOChannel::create_from_file(path.string(), "r"); - Glib::MatchInfo matchesHead, matchesDesc; + Glib::ustring date; + boost::optional person, email; std::set atoms; std::list message; - Glib::ustring line, person, email, date; - bool inMask = false; int n = 0; auto i = tempTable.second; - while (iochannel->read_line(line) == Glib::IO_STATUS_NORMAL) { - boost::algorithm::trim_right_if(line, &g_unichar_isspace); - if (maskHead->match(line, matchesHead) && matchesHead.get_match_count() == 4) { - // Mask header - inMask = true; - person = matchesHead.fetch(1); - email = matchesHead.fetch(2); - date = matchesHead.fetch(3); - } - else if (inMask && maskDesc->match(line, matchesDesc) && matchesDesc.get_match_count() == 2) { - // Mask message - message.push_back(line); - } - else if (inMask && !line.empty()) { - // Masked atom - atoms.insert(line); - } - else if (inMask && line.empty()) { - // End - inMask = false; - i->bindParamS(0, "{" + boost::algorithm::join(atoms, ",") + "}"); - i->bindParamS(1, date); - i->bindParamS(2, email); - i->bindParamS(3, boost::algorithm::join(message, " ")); - i->bindParamI(4, ++n); - i->bindParamS(5, person); - i->execute(); - atoms.clear(); - message.clear(); - } - } + AdHoc::FileUtils::MemMap f(path); + AdHoc::Lexer l({ + { { AdHoc::Lexer::InitialState }, maskHead, [&](auto e) + { + person = e->pattern()->match(1); + email = e->pattern()->match(3); + date = *e->pattern()->match(4); + e->pushState("mask"); + } }, + { { "mask" }, maskDesc, [&](auto e) + { + message.push_back(*e->pattern()->match(1)); + } }, + { { "mask" }, atom, [&](auto e) + { + atoms.insert(*e->pattern()->match(1)); + } }, + { { "mask" }, end, [&](auto e) + { + i->bindParamS(0, "{" + boost::algorithm::join(atoms, ",") + "}"); + i->bindParamS(1, date); + if (email) i->bindParamS(2, *email); else i->bindNull(2); + i->bindParamS(3, boost::algorithm::join(message, " ")); + i->bindParamI(4, ++n); + if (person) i->bindParamS(5, *person); else i->bindNull(5); + i->execute(); + atoms.clear(); + message.clear(); + e->popState(); + } }, + { { AdHoc::Lexer::InitialState }, discard, [&](auto) { } } + }); + l.extract((gchar *)f.data, f.getStat().st_size); DB::TablePatch p; p.dest = "gentoobrowse.masksets"; diff --git a/gentoobrowse-api/unittests/testMaintenance.cpp b/gentoobrowse-api/unittests/testMaintenance.cpp index a845b7c..5cceb2d 100644 --- a/gentoobrowse-api/unittests/testMaintenance.cpp +++ b/gentoobrowse-api/unittests/testMaintenance.cpp @@ -41,7 +41,8 @@ BOOST_FIXTURE_TEST_SUITE(tp, TestClient) void doRefreshPackageTree(SampleData & sd, DB::ConnectionPtr db, const std::string & archive, const std::string & dir, Gentoo::MaintenancePrx m, int64_t files, int64_t cats, int64_t devvcs, int64_t pkgs, int64_t ebs, int64_t ebus, - int64_t ebas, int64_t pus, int64_t ug, int64_t ul, int64_t ugs, int64_t ugds, int64_t deps, int64_t rdeps, int64_t news) + int64_t ebas, int64_t pus, int64_t ug, int64_t ul, int64_t ugs, int64_t ugds, int64_t deps, int64_t rdeps, + int64_t news, int64_t masks) { if (!archive.empty()) { sd.extract(archive, dir); @@ -64,6 +65,7 @@ doRefreshPackageTree(SampleData & sd, DB::ConnectionPtr db, const std::string & SQL_REQUIRE_EQUAL("SELECT COUNT(*) FROM gentoobrowse.ebuild_deps", int64_t, deps); SQL_REQUIRE_EQUAL("SELECT COUNT(*) FROM gentoobrowse.ebuild_rdeps", int64_t, rdeps); SQL_REQUIRE_EQUAL("SELECT COUNT(*) FROM gentoobrowse.news", int64_t, news); + SQL_REQUIRE_EQUAL("SELECT COUNT(*) FROM gentoobrowse.masksets", int64_t, masks); } void @@ -99,15 +101,15 @@ BOOST_AUTO_TEST_CASE( refreshPackageTree ) insRepo->execute(); doRefreshPackageTree(sd, db, "4156eb45cf3b0ce1d7125b84efd8688c2d6e831d", "gentoo", - m, 2084, 5, 1, 482, 981, 3626, 4593, 501, 393, 238, 50, 1573, 2008, 1543, 81); + m, 2084, 5, 1, 482, 981, 3626, 4593, 501, 393, 238, 50, 1573, 2008, 1543, 81, 152); dumpDb(db); doRefreshPackageTree(sd, db, "756569aa764177340726dd3d40b41d89b11b20c7", "gentoo", - m, 2087, 5, 1, 484, 982, 3638, 4599, 503, 393, 238, 50, 1573, 2009, 1546, 79); + m, 2087, 5, 1, 484, 982, 3638, 4599, 503, 393, 238, 50, 1573, 2009, 1546, 79, 152); dumpDb(db); doRefreshPackageTree(sd, db, "", "gentoo", - m, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + m, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); m->refreshPackageTree(); } -- cgit v1.2.3