summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Goodliffe <dan@randomdan.homeip.net>2016-10-14 01:58:19 +0100
committerDan Goodliffe <dan@randomdan.homeip.net>2016-10-14 01:58:19 +0100
commit7567f3f24c17dde282a09235a9c43bfc979d17c4 (patch)
treefc5043b6059001c0512376406467f6ece743c353
parentMigrate file utils from gentoobrowse-api (diff)
downloadlibadhocutil-7567f3f24c17dde282a09235a9c43bfc979d17c4.tar.bz2
libadhocutil-7567f3f24c17dde282a09235a9c43bfc979d17c4.tar.xz
libadhocutil-7567f3f24c17dde282a09235a9c43bfc979d17c4.zip
Migrate extensible lexer from gentoobrowse-api
-rw-r--r--libadhocutil/Jamfile.jam1
-rw-r--r--libadhocutil/lexer-regex.cpp78
-rw-r--r--libadhocutil/lexer-regex.h13
-rw-r--r--libadhocutil/lexer.cpp78
-rw-r--r--libadhocutil/lexer.h61
-rw-r--r--libadhocutil/unittests/Jamfile.jam11
-rw-r--r--libadhocutil/unittests/testLexer.cpp80
7 files changed, 322 insertions, 0 deletions
diff --git a/libadhocutil/Jamfile.jam b/libadhocutil/Jamfile.jam
index e0504d2..5793a8d 100644
--- a/libadhocutil/Jamfile.jam
+++ b/libadhocutil/Jamfile.jam
@@ -21,6 +21,7 @@ lib adhocutil :
<library>boost_filesystem
<library>boost_thread
<library>curl
+ <library>..//glibmm
<library>dl
: :
<include>.
diff --git a/libadhocutil/lexer-regex.cpp b/libadhocutil/lexer-regex.cpp
new file mode 100644
index 0000000..9ffee8b
--- /dev/null
+++ b/libadhocutil/lexer-regex.cpp
@@ -0,0 +1,78 @@
+#include "lexer-regex.h"
+
+namespace AdHoc {
+ namespace LexerMatchers {
+ class Regex : public Lexer::Pattern {
+ public:
+ Regex(const Glib::ustring & pattern, GRegexCompileFlags compile, GRegexMatchFlags match) :
+ err(nullptr),
+ regex(g_regex_new(pattern.c_str(), compile, match, &err)),
+ info(nullptr)
+ {
+ if (!regex) {
+ std::runtime_error e(std::string("Failed to create GRegex: ") + err->message);
+ g_error_free(err);
+ throw e;
+ }
+ }
+
+ ~Regex()
+ {
+ if (err) {
+ g_error_free(err);
+ }
+ if (info) {
+ g_match_info_free(info);
+ }
+ g_regex_unref(regex);
+ }
+
+ bool matches(const gchar * string, size_t length, size_t position) const override
+ {
+ if (info) {
+ g_match_info_free(info);
+ }
+ g_regex_match_full(regex, string, length, position, G_REGEX_MATCH_ANCHORED, &info, &err);
+ if (err) {
+ std::runtime_error e(std::string("Failed to execute regex: ") + err->message);
+ g_error_free(err);
+ throw e;
+ }
+ str = string;
+ return g_match_info_matches(info);
+ }
+
+ size_t matchedLength() const override
+ {
+ gint start, end;
+ g_match_info_fetch_pos(info, 0, &start, &end);
+ return end - start;
+ }
+
+ boost::optional<Glib::ustring> match(int n) const override
+ {
+ gint start, end;
+ if (g_match_info_fetch_pos(info, n, &start, &end)) {
+ if (start == -1 && end == -1) {
+ return boost::optional<Glib::ustring>();
+ }
+ return Glib::ustring(str + start, end - start);
+ }
+ return boost::optional<Glib::ustring>();
+ }
+
+ private:
+ mutable GError * err;
+ GRegex * regex;
+ mutable GMatchInfo * info;
+ mutable const gchar * str;
+ };
+
+ Lexer::PatternPtr
+ regex(const Glib::ustring & pattern, GRegexCompileFlags compile, GRegexMatchFlags match)
+ {
+ return Lexer::PatternPtr(new Regex(pattern, compile, match));
+ }
+ }
+}
+
diff --git a/libadhocutil/lexer-regex.h b/libadhocutil/lexer-regex.h
new file mode 100644
index 0000000..2397071
--- /dev/null
+++ b/libadhocutil/lexer-regex.h
@@ -0,0 +1,13 @@
+#ifndef ADHOCUTIL_LEXER_REGEX_H
+#define ADHOCUTIL_LEXER_REGEX_H
+
+#include "lexer.h"
+
+namespace AdHoc {
+ namespace LexerMatchers {
+ DLL_PUBLIC Lexer::PatternPtr regex(const Glib::ustring &, GRegexCompileFlags compile = (GRegexCompileFlags)0, GRegexMatchFlags match = (GRegexMatchFlags)0);
+ }
+};
+
+#endif
+
diff --git a/libadhocutil/lexer.cpp b/libadhocutil/lexer.cpp
new file mode 100644
index 0000000..95e079b
--- /dev/null
+++ b/libadhocutil/lexer.cpp
@@ -0,0 +1,78 @@
+#include "lexer.h"
+
+namespace AdHoc {
+ const Lexer::State Lexer::InitialState = "";
+
+ Lexer::Lexer()
+ {
+ }
+
+ Lexer::Lexer(const Rules & r) : rules(r)
+ {
+ }
+
+ void
+ Lexer::extract(const gchar * string, size_t length) const
+ {
+ ExecuteState es;
+ while (es.position < length) {
+ const Rule * selected = nullptr;
+ for (const auto & r : rules) {
+ const auto & s = boost::get<0>(r);
+ if (s.find(es.getState()) == s.end()) {
+ continue;
+ }
+ const auto & p = boost::get<1>(r);
+ if (p->matches(string, length, es.position)) {
+ selected = &r;
+ break;
+ }
+ }
+ if (!selected) {
+ throw std::runtime_error(std::string("Unexpected input in state (" + es.getState() + ") at ") + (string + es.position));
+ }
+ es.pattern = boost::get<1>(*selected);
+ const auto & h = boost::get<2>(*selected);
+ h(&es);
+ es.position += es.pattern->matchedLength();
+ }
+
+ }
+
+ Lexer::ExecuteState::ExecuteState() :
+ position(0)
+ {
+ stateStack.push_back(InitialState);
+ }
+
+ void
+ Lexer::ExecuteState::setState(const State & s)
+ {
+ stateStack.back() = s;
+ }
+
+ void
+ Lexer::ExecuteState::pushState(const State & s)
+ {
+ stateStack.push_back(s);
+ }
+
+ void
+ Lexer::ExecuteState::popState()
+ {
+ stateStack.pop_back();
+ }
+
+ const Lexer::State &
+ Lexer::ExecuteState::getState() const
+ {
+ return stateStack.back();
+ }
+
+ size_t
+ Lexer::ExecuteState::depth() const
+ {
+ return stateStack.size();
+ }
+}
+
diff --git a/libadhocutil/lexer.h b/libadhocutil/lexer.h
new file mode 100644
index 0000000..fdc4507
--- /dev/null
+++ b/libadhocutil/lexer.h
@@ -0,0 +1,61 @@
+#ifndef ADHOCUTIL_LEXER_H
+#define ADHOCUTIL_LEXER_H
+
+#include <vector>
+#include <glibmm/ustring.h>
+#include <set>
+#include <boost/tuple/tuple.hpp>
+#include <boost/function.hpp>
+#include <boost/shared_ptr.hpp>
+#include <boost/optional.hpp>
+#include "visibility.h"
+
+namespace AdHoc {
+ class DLL_PUBLIC Lexer {
+ public:
+ class Pattern {
+ public:
+ virtual ~Pattern() = default;
+
+ virtual bool matches(const gchar *, size_t, size_t) const = 0;
+ virtual size_t matchedLength() const = 0;
+ virtual boost::optional<Glib::ustring> match(int) const = 0;
+ };
+ typedef boost::shared_ptr<Pattern> PatternPtr;
+
+ typedef std::string State;
+ typedef std::set<State> States;
+
+ class ExecuteState {
+ public:
+ ExecuteState();
+
+ void pushState(const State &);
+ void popState();
+ void setState(const State &);
+ const State & getState() const;
+ size_t depth() const;
+
+ size_t position;
+ PatternPtr pattern;
+
+ private:
+ std::vector<State> stateStack;
+ };
+
+ typedef boost::function<void(ExecuteState *)> Handler;
+ typedef boost::tuple<States, PatternPtr, Handler> Rule;
+ typedef std::vector<Rule> Rules;
+
+ static const State InitialState;
+ Lexer();
+ Lexer(const Rules &);
+
+ Rules rules;
+
+ void extract(const gchar * string, size_t length) const;
+ };
+}
+
+#endif
+
diff --git a/libadhocutil/unittests/Jamfile.jam b/libadhocutil/unittests/Jamfile.jam
index 36250dc..b27f1d6 100644
--- a/libadhocutil/unittests/Jamfile.jam
+++ b/libadhocutil/unittests/Jamfile.jam
@@ -249,3 +249,14 @@ run
<library>boost_filesystem
;
+run
+ testLexer.cpp
+ : : :
+ <define>BOOST_TEST_DYN_LINK
+ <library>..//adhocutil
+ <library>boost_utf
+ <define>ROOT=\"$(me)\"
+ <library>boost_system
+ <library>boost_filesystem
+ ;
+
diff --git a/libadhocutil/unittests/testLexer.cpp b/libadhocutil/unittests/testLexer.cpp
new file mode 100644
index 0000000..c0b973b
--- /dev/null
+++ b/libadhocutil/unittests/testLexer.cpp
@@ -0,0 +1,80 @@
+#define BOOST_TEST_MODULE Lexer
+#include <boost/test/unit_test.hpp>
+
+#include <lexer.h>
+#include <lexer-regex.h>
+
+using namespace AdHoc;
+using namespace AdHoc::LexerMatchers;
+
+BOOST_AUTO_TEST_CASE( defaultConstructor )
+{
+ AdHoc::Lexer l;
+ l.rules.push_back({ { AdHoc::Lexer::InitialState }, regex("a"), [](auto) { } });
+}
+
+BOOST_AUTO_TEST_CASE( simple )
+{
+ int m = 0;
+ AdHoc::Lexer l({
+ { { AdHoc::Lexer::InitialState }, regex("a"), [&](auto) { m += 1; } }
+ });
+ BOOST_REQUIRE_EQUAL(0, m);
+ l.extract("aaaa", 4);
+ BOOST_REQUIRE_EQUAL(4, m);
+ BOOST_REQUIRE_THROW({
+ l.extract("abcd", 4);
+ }, std::runtime_error);
+}
+
+BOOST_AUTO_TEST_CASE( state )
+{
+ int m = 0;
+ std::string s;
+ AdHoc::Lexer l({
+ { { AdHoc::Lexer::InitialState }, regex("a"), [&](auto es)
+ {
+ m += 1;
+ BOOST_REQUIRE_EQUAL(1, es->depth());
+ es->pushState("2");
+ BOOST_REQUIRE_EQUAL(2, es->depth());
+ } },
+ { { "2" }, regex("a"), [&](auto es)
+ {
+ m += 2;
+ BOOST_REQUIRE_EQUAL("2", es->getState());
+ BOOST_REQUIRE_EQUAL(2, es->depth());
+ es->pushState("3");
+ BOOST_REQUIRE_EQUAL("3", es->getState());
+ BOOST_REQUIRE_EQUAL(3, es->depth());
+ } },
+ { { "3" }, regex("a"), [&](auto es)
+ {
+ m += 3;
+ s += *es->pattern->match(0);
+ BOOST_REQUIRE_EQUAL(3, es->depth());
+ es->setState("4");
+ BOOST_REQUIRE_EQUAL(3, es->depth());
+ BOOST_REQUIRE_EQUAL("4", es->getState());
+ BOOST_REQUIRE_EQUAL(3, es->depth());
+ BOOST_REQUIRE(!es->pattern->match(1));
+ BOOST_REQUIRE(!es->pattern->match(2));
+ es->popState();
+ BOOST_REQUIRE_EQUAL(2, es->depth());
+ BOOST_REQUIRE_EQUAL("2", es->getState());
+ es->pushState("3");
+ BOOST_REQUIRE_EQUAL(3, es->depth());
+ BOOST_REQUIRE_EQUAL("3", es->getState());
+ } }
+ });
+ BOOST_REQUIRE_EQUAL(0, m);
+ l.extract("aaaa", 4);
+ BOOST_REQUIRE_EQUAL(9, m);
+ BOOST_REQUIRE_EQUAL("aa", s);
+}
+
+BOOST_AUTO_TEST_CASE( badre )
+{
+ BOOST_REQUIRE_THROW(regex("["), std::runtime_error);
+}
+