diff options
| -rw-r--r-- | libadhocutil/Jamfile.jam | 1 | ||||
| -rw-r--r-- | libadhocutil/lexer-regex.cpp | 78 | ||||
| -rw-r--r-- | libadhocutil/lexer-regex.h | 13 | ||||
| -rw-r--r-- | libadhocutil/lexer.cpp | 78 | ||||
| -rw-r--r-- | libadhocutil/lexer.h | 61 | ||||
| -rw-r--r-- | libadhocutil/unittests/Jamfile.jam | 11 | ||||
| -rw-r--r-- | libadhocutil/unittests/testLexer.cpp | 80 | 
7 files changed, 322 insertions, 0 deletions
diff --git a/libadhocutil/Jamfile.jam b/libadhocutil/Jamfile.jam index e0504d2..5793a8d 100644 --- a/libadhocutil/Jamfile.jam +++ b/libadhocutil/Jamfile.jam @@ -21,6 +21,7 @@ lib adhocutil :  	<library>boost_filesystem  	<library>boost_thread  	<library>curl +	<library>..//glibmm  	<library>dl  	: :  	<include>. diff --git a/libadhocutil/lexer-regex.cpp b/libadhocutil/lexer-regex.cpp new file mode 100644 index 0000000..9ffee8b --- /dev/null +++ b/libadhocutil/lexer-regex.cpp @@ -0,0 +1,78 @@ +#include "lexer-regex.h" + +namespace AdHoc { +	namespace LexerMatchers { +		class Regex : public Lexer::Pattern { +			public: +				Regex(const Glib::ustring & pattern, GRegexCompileFlags compile, GRegexMatchFlags match) : +					err(nullptr), +					regex(g_regex_new(pattern.c_str(), compile, match, &err)), +					info(nullptr) +				{ +					if (!regex) { +						std::runtime_error e(std::string("Failed to create GRegex: ") + err->message); +						g_error_free(err); +						throw e; +					} +				} + +				~Regex() +				{ +					if (err) { +						g_error_free(err); +					} +					if (info) { +						g_match_info_free(info); +					} +					g_regex_unref(regex); +				} + +				bool matches(const gchar * string, size_t length, size_t position) const override +				{ +					if (info) { +						g_match_info_free(info); +					} +					g_regex_match_full(regex, string, length, position, G_REGEX_MATCH_ANCHORED, &info, &err); +					if (err) { +						std::runtime_error e(std::string("Failed to execute regex: ") + err->message); +						g_error_free(err); +						throw e; +					} +					str = string; +					return g_match_info_matches(info); +				} + +				size_t matchedLength() const override +				{ +					gint start, end; +					g_match_info_fetch_pos(info, 0, &start, &end); +					return end - start; +				} + +				boost::optional<Glib::ustring> match(int n) const override +				{ +					gint start, end; +					if (g_match_info_fetch_pos(info, n, &start, &end)) { +						if (start == -1 && end == -1) { +							return boost::optional<Glib::ustring>(); +						} +						return Glib::ustring(str + start, end - start); +					} +					return boost::optional<Glib::ustring>(); +				} + +			private: +				mutable GError * err; +				GRegex * regex; +				mutable GMatchInfo * info; +				mutable const gchar * str; +		}; + +		Lexer::PatternPtr +		regex(const Glib::ustring & pattern, GRegexCompileFlags compile, GRegexMatchFlags match) +		{ +			return Lexer::PatternPtr(new Regex(pattern, compile, match)); +		} +	} +} + diff --git a/libadhocutil/lexer-regex.h b/libadhocutil/lexer-regex.h new file mode 100644 index 0000000..2397071 --- /dev/null +++ b/libadhocutil/lexer-regex.h @@ -0,0 +1,13 @@ +#ifndef ADHOCUTIL_LEXER_REGEX_H +#define ADHOCUTIL_LEXER_REGEX_H + +#include "lexer.h" + +namespace AdHoc { +	namespace LexerMatchers { +		DLL_PUBLIC Lexer::PatternPtr regex(const Glib::ustring &, GRegexCompileFlags compile = (GRegexCompileFlags)0, GRegexMatchFlags match = (GRegexMatchFlags)0); +	} +}; + +#endif + diff --git a/libadhocutil/lexer.cpp b/libadhocutil/lexer.cpp new file mode 100644 index 0000000..95e079b --- /dev/null +++ b/libadhocutil/lexer.cpp @@ -0,0 +1,78 @@ +#include "lexer.h" + +namespace AdHoc { +	const Lexer::State Lexer::InitialState = ""; + +	Lexer::Lexer() +	{ +	} + +	Lexer::Lexer(const Rules & r) : rules(r) +	{ +	} + +	void +	Lexer::extract(const gchar * string, size_t length) const +	{ +		ExecuteState es; +		while (es.position < length) { +			const Rule * selected = nullptr; +			for (const auto & r : rules) { +				const auto & s = boost::get<0>(r); +				if (s.find(es.getState()) == s.end()) { +					continue; +				} +				const auto & p = boost::get<1>(r); +				if (p->matches(string, length, es.position)) { +					selected = &r; +					break; +				} +			} +			if (!selected) { +				throw std::runtime_error(std::string("Unexpected input in state (" + es.getState() + ") at ") + (string + es.position)); +			} +			es.pattern = boost::get<1>(*selected); +			const auto & h = boost::get<2>(*selected); +			h(&es); +			es.position += es.pattern->matchedLength(); +		} +		 +	} + +	Lexer::ExecuteState::ExecuteState() : +		position(0) +	{ +		stateStack.push_back(InitialState); +	} + +	void +	Lexer::ExecuteState::setState(const State & s) +	{ +		stateStack.back() = s; +	} + +	void +	Lexer::ExecuteState::pushState(const State & s) +	{ +		stateStack.push_back(s); +	} + +	void +	Lexer::ExecuteState::popState() +	{ +		stateStack.pop_back(); +	} + +	const Lexer::State & +	Lexer::ExecuteState::getState() const +	{ +		return stateStack.back(); +	} + +	size_t +	Lexer::ExecuteState::depth() const +	{ +		return stateStack.size(); +	} +} + diff --git a/libadhocutil/lexer.h b/libadhocutil/lexer.h new file mode 100644 index 0000000..fdc4507 --- /dev/null +++ b/libadhocutil/lexer.h @@ -0,0 +1,61 @@ +#ifndef ADHOCUTIL_LEXER_H +#define ADHOCUTIL_LEXER_H + +#include <vector> +#include <glibmm/ustring.h> +#include <set> +#include <boost/tuple/tuple.hpp> +#include <boost/function.hpp> +#include <boost/shared_ptr.hpp> +#include <boost/optional.hpp> +#include "visibility.h" + +namespace AdHoc { +	class DLL_PUBLIC Lexer { +		public: +			class Pattern { +				public: +					virtual ~Pattern() = default; + +					virtual bool matches(const gchar *, size_t, size_t) const = 0; +					virtual size_t matchedLength() const = 0; +					virtual boost::optional<Glib::ustring> match(int) const = 0; +			}; +			typedef boost::shared_ptr<Pattern> PatternPtr; + +			typedef std::string State; +			typedef std::set<State> States; + +			class ExecuteState { +				public: +					ExecuteState(); + +					void pushState(const State &); +					void popState(); +					void setState(const State &); +					const State & getState() const; +					size_t depth() const; + +					size_t position; +					PatternPtr pattern; + +				private: +					std::vector<State> stateStack; +			}; + +			typedef boost::function<void(ExecuteState *)> Handler; +			typedef boost::tuple<States, PatternPtr, Handler> Rule; +			typedef std::vector<Rule> Rules; + +			static const State InitialState; +			Lexer(); +			Lexer(const Rules &); + +			Rules rules; + +			void extract(const gchar * string, size_t length) const; +	}; +} + +#endif + diff --git a/libadhocutil/unittests/Jamfile.jam b/libadhocutil/unittests/Jamfile.jam index 36250dc..b27f1d6 100644 --- a/libadhocutil/unittests/Jamfile.jam +++ b/libadhocutil/unittests/Jamfile.jam @@ -249,3 +249,14 @@ run  	<library>boost_filesystem  	; +run +	testLexer.cpp +	: : : +	<define>BOOST_TEST_DYN_LINK +	<library>..//adhocutil +	<library>boost_utf +	<define>ROOT=\"$(me)\" +	<library>boost_system +	<library>boost_filesystem +	; + diff --git a/libadhocutil/unittests/testLexer.cpp b/libadhocutil/unittests/testLexer.cpp new file mode 100644 index 0000000..c0b973b --- /dev/null +++ b/libadhocutil/unittests/testLexer.cpp @@ -0,0 +1,80 @@ +#define BOOST_TEST_MODULE Lexer +#include <boost/test/unit_test.hpp> + +#include <lexer.h> +#include <lexer-regex.h> + +using namespace AdHoc; +using namespace AdHoc::LexerMatchers; + +BOOST_AUTO_TEST_CASE( defaultConstructor ) +{ +	AdHoc::Lexer l; +	l.rules.push_back({ { AdHoc::Lexer::InitialState }, regex("a"), [](auto) { } }); +} + +BOOST_AUTO_TEST_CASE( simple ) +{ +	int m = 0; +	AdHoc::Lexer l({ +		{ { AdHoc::Lexer::InitialState }, regex("a"), [&](auto) { m += 1; } } +	}); +	BOOST_REQUIRE_EQUAL(0, m); +	l.extract("aaaa", 4); +	BOOST_REQUIRE_EQUAL(4, m); +	BOOST_REQUIRE_THROW({ +		l.extract("abcd", 4); +	}, std::runtime_error); +} + +BOOST_AUTO_TEST_CASE( state ) +{ +	int m = 0; +	std::string s; +	AdHoc::Lexer l({ +		{ { AdHoc::Lexer::InitialState }, regex("a"), [&](auto es) +			{ +				m += 1; +				BOOST_REQUIRE_EQUAL(1, es->depth()); +				es->pushState("2"); +				BOOST_REQUIRE_EQUAL(2, es->depth()); +			} }, +		{ { "2" }, regex("a"), [&](auto es) +			{ +				m += 2; +				BOOST_REQUIRE_EQUAL("2", es->getState()); +				BOOST_REQUIRE_EQUAL(2, es->depth()); +				es->pushState("3"); +				BOOST_REQUIRE_EQUAL("3", es->getState()); +				BOOST_REQUIRE_EQUAL(3, es->depth()); +			} }, +		{ { "3" }, regex("a"), [&](auto es) +			{ +				m += 3; +				s += *es->pattern->match(0); +				BOOST_REQUIRE_EQUAL(3, es->depth()); +				es->setState("4"); +				BOOST_REQUIRE_EQUAL(3, es->depth()); +				BOOST_REQUIRE_EQUAL("4", es->getState()); +				BOOST_REQUIRE_EQUAL(3, es->depth()); +				BOOST_REQUIRE(!es->pattern->match(1)); +				BOOST_REQUIRE(!es->pattern->match(2)); +				es->popState(); +				BOOST_REQUIRE_EQUAL(2, es->depth()); +				BOOST_REQUIRE_EQUAL("2", es->getState()); +				es->pushState("3"); +				BOOST_REQUIRE_EQUAL(3, es->depth()); +				BOOST_REQUIRE_EQUAL("3", es->getState()); +			} } +	}); +	BOOST_REQUIRE_EQUAL(0, m); +	l.extract("aaaa", 4); +	BOOST_REQUIRE_EQUAL(9, m); +	BOOST_REQUIRE_EQUAL("aa", s); +} + +BOOST_AUTO_TEST_CASE( badre ) +{ +	BOOST_REQUIRE_THROW(regex("["), std::runtime_error); +} +  | 
