diff options
| author | Dan Goodliffe <dan@randomdan.homeip.net> | 2016-05-02 19:47:29 +0100 | 
|---|---|---|
| committer | Dan Goodliffe <dan@randomdan.homeip.net> | 2016-05-02 19:47:29 +0100 | 
| commit | 69b8f140b414712379761cc2f8ed5114b229dba3 (patch) | |
| tree | d82da11f48ef132fb467b3c8d28cdc0551d760dc | |
| parent | Process masks (diff) | |
| download | gentoobrowse-api-69b8f140b414712379761cc2f8ed5114b229dba3.tar.bz2 gentoobrowse-api-69b8f140b414712379761cc2f8ed5114b229dba3.tar.xz gentoobrowse-api-69b8f140b414712379761cc2f8ed5114b229dba3.zip | |
Add a flexible extensible lexer
| -rw-r--r-- | gentoobrowse-api/service/utils/lexer.cpp | 137 | ||||
| -rw-r--r-- | gentoobrowse-api/service/utils/lexer.h | 62 | 
2 files changed, 199 insertions, 0 deletions
| diff --git a/gentoobrowse-api/service/utils/lexer.cpp b/gentoobrowse-api/service/utils/lexer.cpp new file mode 100644 index 0000000..8ed83b8 --- /dev/null +++ b/gentoobrowse-api/service/utils/lexer.cpp @@ -0,0 +1,137 @@ +#include "lexer.h" + +namespace Gentoo { +	namespace Utils { +		const Lexer::State Lexer::InitialState = ""; + +		class Regex : public Lexer::Pattern { +			public: +				Regex(const Glib::ustring & pattern, GRegexCompileFlags compile, GRegexMatchFlags match) : +					err(nullptr), +					regex(g_regex_new(pattern.c_str(), compile, match, &err)), +					info(nullptr) +				{ +					if (!regex) { +						std::runtime_error e(std::string("Failed to create GRegex: ") + err->message); +						g_error_free(err); +						throw e; +					} +				} + +				~Regex() +				{ +					if (err) { +						g_error_free(err); +					} +					if (info) { +						g_match_info_free(info); +					} +					g_regex_unref(regex); +				} + +				bool matches(const gchar * string, size_t length, size_t position) const override +				{ +					if (info) { +						g_match_info_free(info); +					} +					g_regex_match_full(regex, string, length, position, G_REGEX_MATCH_ANCHORED, &info, &err); +					if (err) { +						std::runtime_error e(std::string("Failed to execute regex: ") + err->message); +						g_error_free(err); +						throw e; +					} +					str = string; +					return g_match_info_matches(info); +				} + +				size_t matchedLength() const override +				{ +					gint start, end; +					g_match_info_fetch_pos(info, 0, &start, &end); +					return end - start; +				} + +				boost::optional<Glib::ustring> match(int n) const override +				{ +					gint start, end; +					if (g_match_info_fetch_pos(info, n, &start, &end)) { +						if (start == -1 && end == -1) { +							return boost::optional<Glib::ustring>(); +						} +						return Glib::ustring(str + start, end - start); +					} +					return boost::optional<Glib::ustring>(); +				} + +			private: +				mutable GError * err; +				GRegex * regex; +				mutable GMatchInfo * info; +				mutable const gchar * str; +		}; + +		Lexer::PatternPtr +		Lexer::regex(const Glib::ustring & pattern, GRegexCompileFlags compile, GRegexMatchFlags match) +		{ +			return PatternPtr(new Regex(pattern, compile, match)); +		} + +		void +		Lexer::extract(const gchar * string, size_t length) const +		{ +			ExecuteState es; +			while (es.position < length) { +				const Rule * selected = nullptr; +				for (const auto & r : rules) { +					const auto & s = boost::get<0>(r); +					if (s.find(es.getState()) == s.end()) { +						continue; +					} +					const auto & p = boost::get<1>(r); +					if (p->matches(string, length, es.position)) { +						selected = &r; +						break; +					} +				} +				if (!selected) { +					throw std::runtime_error(std::string("Unexpected input at ") + (string + es.position)); +				} +				es.pattern = boost::get<1>(*selected); +				const auto & h = boost::get<2>(*selected); +				h(&es); +				es.position += es.pattern->matchedLength(); +			} +			 +		} + +		Lexer::ExecuteState::ExecuteState() : +			position(0) +		{ +			stateStack.push_back(InitialState); +		} + +		void +		Lexer::ExecuteState::setState(const State & s) +		{ +			stateStack.back() = s; +		} + +		void +		Lexer::ExecuteState::pushState(const State & s) +		{ +			stateStack.push_back(s); +		} + +		void +		Lexer::ExecuteState::popState() +		{ +			stateStack.pop_back(); +		} + +		const Lexer::State & +		Lexer::ExecuteState::getState() const +		{ +			return stateStack.back(); +		} +	} +} diff --git a/gentoobrowse-api/service/utils/lexer.h b/gentoobrowse-api/service/utils/lexer.h new file mode 100644 index 0000000..44d3d57 --- /dev/null +++ b/gentoobrowse-api/service/utils/lexer.h @@ -0,0 +1,62 @@ +#ifndef GENTOOBROWSE_SERVICE_UTILS_LEXER_H +#define GENTOOBROWSE_SERVICE_UTILS_LEXER_H + +#include <vector> +#include <glibmm/ustring.h> +#include <set> +#include <boost/tuple/tuple.hpp> +#include <boost/function.hpp> +#include <boost/shared_ptr.hpp> +#include <boost/optional.hpp> + +namespace Gentoo { +	namespace Utils { +		class Lexer { +			public: +				class Pattern { +					public: +						virtual ~Pattern() = default; + +						virtual bool matches(const gchar *, size_t, size_t) const = 0; +						virtual size_t matchedLength() const = 0; +						virtual boost::optional<Glib::ustring> match(int) const = 0; +				}; +				typedef boost::shared_ptr<Pattern> PatternPtr; + +				typedef std::string State; +				typedef std::set<State> States; + +				class ExecuteState { +					public: +						ExecuteState(); + +						void pushState(const State &); +						void popState(); +						void setState(const State &); +						const State & getState() const; + +						size_t position; +						PatternPtr pattern; + +					private: +						std::vector<State> stateStack; +				}; + +				typedef boost::function<void(ExecuteState *)> Handler; +				typedef boost::tuple<States, PatternPtr, Handler> Rule; +				typedef std::vector<Rule> Rules; + +				static const State InitialState; +				Rules rules; + +				static PatternPtr regex(const Glib::ustring &, GRegexCompileFlags compile = (GRegexCompileFlags)0, GRegexMatchFlags match = (GRegexMatchFlags)0); + +			public: +				void extract(const gchar * string, size_t length) const; +		}; + +	} +} + +#endif + | 
