summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Goodliffe <dan@randomdan.homeip.net>2016-05-02 19:47:29 +0100
committerDan Goodliffe <dan@randomdan.homeip.net>2016-05-02 19:47:29 +0100
commitc64d0a45e7fd36794af24770747f9807990b5bbd (patch)
treed82da11f48ef132fb467b3c8d28cdc0551d760dc
parentProcess masks (diff)
downloadgentoobrowse-api-c64d0a45e7fd36794af24770747f9807990b5bbd.tar.bz2
gentoobrowse-api-c64d0a45e7fd36794af24770747f9807990b5bbd.tar.xz
gentoobrowse-api-c64d0a45e7fd36794af24770747f9807990b5bbd.zip
Add a flexible extensible lexer
-rw-r--r--gentoobrowse-api/service/utils/lexer.cpp137
-rw-r--r--gentoobrowse-api/service/utils/lexer.h62
2 files changed, 199 insertions, 0 deletions
diff --git a/gentoobrowse-api/service/utils/lexer.cpp b/gentoobrowse-api/service/utils/lexer.cpp
new file mode 100644
index 0000000..8ed83b8
--- /dev/null
+++ b/gentoobrowse-api/service/utils/lexer.cpp
@@ -0,0 +1,137 @@
+#include "lexer.h"
+
+namespace Gentoo {
+ namespace Utils {
+ const Lexer::State Lexer::InitialState = "";
+
+ class Regex : public Lexer::Pattern {
+ public:
+ Regex(const Glib::ustring & pattern, GRegexCompileFlags compile, GRegexMatchFlags match) :
+ err(nullptr),
+ regex(g_regex_new(pattern.c_str(), compile, match, &err)),
+ info(nullptr)
+ {
+ if (!regex) {
+ std::runtime_error e(std::string("Failed to create GRegex: ") + err->message);
+ g_error_free(err);
+ throw e;
+ }
+ }
+
+ ~Regex()
+ {
+ if (err) {
+ g_error_free(err);
+ }
+ if (info) {
+ g_match_info_free(info);
+ }
+ g_regex_unref(regex);
+ }
+
+ bool matches(const gchar * string, size_t length, size_t position) const override
+ {
+ if (info) {
+ g_match_info_free(info);
+ }
+ g_regex_match_full(regex, string, length, position, G_REGEX_MATCH_ANCHORED, &info, &err);
+ if (err) {
+ std::runtime_error e(std::string("Failed to execute regex: ") + err->message);
+ g_error_free(err);
+ throw e;
+ }
+ str = string;
+ return g_match_info_matches(info);
+ }
+
+ size_t matchedLength() const override
+ {
+ gint start, end;
+ g_match_info_fetch_pos(info, 0, &start, &end);
+ return end - start;
+ }
+
+ boost::optional<Glib::ustring> match(int n) const override
+ {
+ gint start, end;
+ if (g_match_info_fetch_pos(info, n, &start, &end)) {
+ if (start == -1 && end == -1) {
+ return boost::optional<Glib::ustring>();
+ }
+ return Glib::ustring(str + start, end - start);
+ }
+ return boost::optional<Glib::ustring>();
+ }
+
+ private:
+ mutable GError * err;
+ GRegex * regex;
+ mutable GMatchInfo * info;
+ mutable const gchar * str;
+ };
+
+ Lexer::PatternPtr
+ Lexer::regex(const Glib::ustring & pattern, GRegexCompileFlags compile, GRegexMatchFlags match)
+ {
+ return PatternPtr(new Regex(pattern, compile, match));
+ }
+
+ void
+ Lexer::extract(const gchar * string, size_t length) const
+ {
+ ExecuteState es;
+ while (es.position < length) {
+ const Rule * selected = nullptr;
+ for (const auto & r : rules) {
+ const auto & s = boost::get<0>(r);
+ if (s.find(es.getState()) == s.end()) {
+ continue;
+ }
+ const auto & p = boost::get<1>(r);
+ if (p->matches(string, length, es.position)) {
+ selected = &r;
+ break;
+ }
+ }
+ if (!selected) {
+ throw std::runtime_error(std::string("Unexpected input at ") + (string + es.position));
+ }
+ es.pattern = boost::get<1>(*selected);
+ const auto & h = boost::get<2>(*selected);
+ h(&es);
+ es.position += es.pattern->matchedLength();
+ }
+
+ }
+
+ Lexer::ExecuteState::ExecuteState() :
+ position(0)
+ {
+ stateStack.push_back(InitialState);
+ }
+
+ void
+ Lexer::ExecuteState::setState(const State & s)
+ {
+ stateStack.back() = s;
+ }
+
+ void
+ Lexer::ExecuteState::pushState(const State & s)
+ {
+ stateStack.push_back(s);
+ }
+
+ void
+ Lexer::ExecuteState::popState()
+ {
+ stateStack.pop_back();
+ }
+
+ const Lexer::State &
+ Lexer::ExecuteState::getState() const
+ {
+ return stateStack.back();
+ }
+ }
+}
diff --git a/gentoobrowse-api/service/utils/lexer.h b/gentoobrowse-api/service/utils/lexer.h
new file mode 100644
index 0000000..44d3d57
--- /dev/null
+++ b/gentoobrowse-api/service/utils/lexer.h
@@ -0,0 +1,62 @@
+#ifndef GENTOOBROWSE_SERVICE_UTILS_LEXER_H
+#define GENTOOBROWSE_SERVICE_UTILS_LEXER_H
+
+#include <vector>
+#include <glibmm/ustring.h>
+#include <set>
+#include <boost/tuple/tuple.hpp>
+#include <boost/function.hpp>
+#include <boost/shared_ptr.hpp>
+#include <boost/optional.hpp>
+
+namespace Gentoo {
+ namespace Utils {
+ class Lexer {
+ public:
+ class Pattern {
+ public:
+ virtual ~Pattern() = default;
+
+ virtual bool matches(const gchar *, size_t, size_t) const = 0;
+ virtual size_t matchedLength() const = 0;
+ virtual boost::optional<Glib::ustring> match(int) const = 0;
+ };
+ typedef boost::shared_ptr<Pattern> PatternPtr;
+
+ typedef std::string State;
+ typedef std::set<State> States;
+
+ class ExecuteState {
+ public:
+ ExecuteState();
+
+ void pushState(const State &);
+ void popState();
+ void setState(const State &);
+ const State & getState() const;
+
+ size_t position;
+ PatternPtr pattern;
+
+ private:
+ std::vector<State> stateStack;
+ };
+
+ typedef boost::function<void(ExecuteState *)> Handler;
+ typedef boost::tuple<States, PatternPtr, Handler> Rule;
+ typedef std::vector<Rule> Rules;
+
+ static const State InitialState;
+ Rules rules;
+
+ static PatternPtr regex(const Glib::ustring &, GRegexCompileFlags compile = (GRegexCompileFlags)0, GRegexMatchFlags match = (GRegexMatchFlags)0);
+
+ public:
+ void extract(const gchar * string, size_t length) const;
+ };
+
+ }
+}
+
+#endif
+