belr.hh 7.86 KB
Newer Older
Simon Morlat's avatar
Simon Morlat committed
1 2 3
#ifndef belr_hh
#define belr_hh

Guillaume BIENKOWSKI's avatar
Guillaume BIENKOWSKI committed
4
#include <string>
Simon Morlat's avatar
Simon Morlat committed
5 6 7 8 9 10
#include <list>
#include <map>
#include <memory>

using namespace ::std;

11 12 13 14 15 16 17 18 19 20
#ifdef _MSC_VER
	#ifdef BELR_STATIC
		#define BELR_PUBLIC
	#else
		#ifdef BELR_EXPORTS
			#define BELR_PUBLIC	__declspec(dllexport)
		#else
			#define BELR_PUBLIC	__declspec(dllimport)
		#endif
	#endif
21
#else
22
	#define BELR_PUBLIC
23 24
#endif

Simon Morlat's avatar
Simon Morlat committed
25
namespace belr{
26
	
27
BELR_PUBLIC string tolower(const string &str);
Simon Morlat's avatar
Simon Morlat committed
28

29
class ParserContextBase;
Simon Morlat's avatar
Simon Morlat committed
30

31 32 33
struct TransitionMap{
	TransitionMap();
	bool intersect(const TransitionMap *other);
34 35
	bool intersect(const TransitionMap *other, TransitionMap *result); //performs a AND operation
	void merge(const TransitionMap *other); //Performs an OR operation
36 37 38
	bool mPossibleChars[256];
};

Simon Morlat's avatar
Simon Morlat committed
39
class Recognizer : public enable_shared_from_this<Recognizer>{
Simon Morlat's avatar
Simon Morlat committed
40 41
public:
	void setName(const string &name);
Simon Morlat's avatar
Simon Morlat committed
42
	const string &getName()const;
43
	BELR_PUBLIC size_t feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
Simon Morlat's avatar
Simon Morlat committed
44 45 46
	unsigned int getId()const{
		return mId;
	}
47 48 49
	bool getTransitionMap(TransitionMap *mask);
	void optimize();
	void optimize(int recursionLevel);
50
	virtual ~Recognizer() { }
Simon Morlat's avatar
Simon Morlat committed
51
protected:
52 53 54
	/*returns true if the transition map is complete, false otherwise*/
	virtual bool _getTransitionMap(TransitionMap *mask);
	virtual void _optimize(int recursionLevel)=0;
Simon Morlat's avatar
Simon Morlat committed
55
	Recognizer();
56
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos)=0;
Simon Morlat's avatar
Simon Morlat committed
57
	string mName;
Simon Morlat's avatar
Simon Morlat committed
58
	unsigned int mId;
Simon Morlat's avatar
Simon Morlat committed
59 60 61 62
};

class CharRecognizer : public Recognizer{
public:
63
	CharRecognizer(int to_recognize, bool caseSensitive=false);
Simon Morlat's avatar
Simon Morlat committed
64
private:
65
	virtual void _optimize(int recursionLevel);
66
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
67 68
	int mToRecognize;
	bool mCaseSensitive;
Simon Morlat's avatar
Simon Morlat committed
69 70
};

Simon Morlat's avatar
Simon Morlat committed
71
class Selector : public Recognizer{
Simon Morlat's avatar
Simon Morlat committed
72 73 74
public:
	Selector();
	shared_ptr<Selector> addRecognizer(const shared_ptr<Recognizer> &element);
75
protected:
76
	virtual void _optimize(int recursionLevel);
77
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
78 79
	size_t _feedExclusive(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
	virtual bool _getTransitionMap(TransitionMap *mask);
Simon Morlat's avatar
Simon Morlat committed
80
	list<shared_ptr<Recognizer>> mElements;
81
	bool mIsExclusive;
Simon Morlat's avatar
Simon Morlat committed
82 83
};

84 85 86 87 88
/**This is an optimization of the first one for the case where there can be only a single match*/
class ExclusiveSelector : public Selector{
public:
	ExclusiveSelector();
private:
89
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
90 91
};

Simon Morlat's avatar
Simon Morlat committed
92
class Sequence : public Recognizer{
Simon Morlat's avatar
Simon Morlat committed
93 94 95
public:
	Sequence();
	shared_ptr<Sequence> addRecognizer(const shared_ptr<Recognizer> &element);
96 97 98
	virtual bool _getTransitionMap(TransitionMap *mask);
protected:
	virtual void _optimize(int recursionLevel);
Simon Morlat's avatar
Simon Morlat committed
99
private:
100
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
Simon Morlat's avatar
Simon Morlat committed
101 102 103
	list<shared_ptr<Recognizer>> mElements;
};

Simon Morlat's avatar
Simon Morlat committed
104
class Loop : public Recognizer{
Simon Morlat's avatar
Simon Morlat committed
105 106 107
public:
	Loop();
	shared_ptr<Loop> setRecognizer(const shared_ptr<Recognizer> &element, int min=0, int max=-1);
108 109 110
	virtual bool _getTransitionMap(TransitionMap *mask);
protected:
	virtual void _optimize(int recursionLevel);
Simon Morlat's avatar
Simon Morlat committed
111
private:
112
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
Simon Morlat's avatar
Simon Morlat committed
113 114 115 116 117 118 119
	shared_ptr<Recognizer> mRecognizer;
	int mMin, mMax;
};


class Foundation{
public:
120
	static shared_ptr<CharRecognizer> charRecognizer(int character, bool caseSensitive=false);
121
	static shared_ptr<Selector> selector(bool isExclusive=false);
Simon Morlat's avatar
Simon Morlat committed
122 123 124 125
	static shared_ptr<Sequence> sequence();
	static shared_ptr<Loop> loop();
};

126 127 128 129 130
/*this is an optimization of a selector with multiple individual char recognizer*/
class CharRange : public Recognizer{
public:
	CharRange(int begin, int end);
private:
131
	virtual void _optimize(int recursionLevel);
132 133 134 135 136 137 138
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
	int mBegin,mEnd;
};

class Literal : public Recognizer{
public:
	Literal(const string &lit);
139
	virtual bool _getTransitionMap(TransitionMap *mask);
140
private:
141
	virtual void _optimize(int recursionLevel);
142 143 144 145 146
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
	string mLiteral;
	size_t mLiteralSize;
};

Simon Morlat's avatar
Simon Morlat committed
147 148 149 150 151 152 153 154 155 156 157 158
class Utils{
public:
	static shared_ptr<Recognizer> literal(const string & lt);
	static shared_ptr<Recognizer> char_range(int begin, int end);
};

class RecognizerPointer :  public Recognizer{
public:
	RecognizerPointer();
	shared_ptr<Recognizer> getPointed();
	void setPointed(const shared_ptr<Recognizer> &r);
private:
159
	virtual void _optimize(int recursionLevel);
160
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
Simon Morlat's avatar
Simon Morlat committed
161 162 163
	shared_ptr<Recognizer> mRecognizer;
};

164 165 166
/**
 * Grammar class represents an ABNF grammar, with all its rules.
**/
Simon Morlat's avatar
Simon Morlat committed
167 168
class Grammar{
public:
169 170 171
	/**
	 * Initialize an empty grammar, giving a name for debugging.
	**/
172
	BELR_PUBLIC Grammar(const string &name);
173
	
174
	BELR_PUBLIC ~Grammar();
175
	
176 177 178
	/**
	 * Include another grammar into this grammar.
	**/
179
	BELR_PUBLIC void include(const shared_ptr<Grammar>& grammar);
180 181 182 183 184 185
	/**
	 * Add arule to the grammar.
	 * @param name the name of the rule
	 * @param rule the rule recognier, must be an instance of belr::Recognizer.
	 * @return the rule (the recognizer). The recognizer is given the name of the rule.
	 * @note The grammar takes ownership of the recognizer, which must not be used outside of this grammar.
Simon Morlat's avatar
Simon Morlat committed
186 187
	 * TODO: use unique_ptr to enforce this, or make a copy ?
	**/
Simon Morlat's avatar
Simon Morlat committed
188 189 190 191 192
	template <typename _recognizerT>
	shared_ptr<_recognizerT> addRule(const string & name, const shared_ptr<_recognizerT> &rule){
		assignRule(name, rule);
		return rule;
	}
193 194 195 196 197 198 199
	/**
	 * Extend a rule from the grammar.
	 * This corresponds to the '/=' operator of ABNF definition.
	 * @param name the name of the rule to extend.
	 * @param rule the recognizer of the extension.
	 * @return the rule.
	**/
200 201 202 203 204
	template <typename _recognizerT>
	shared_ptr<_recognizerT> extendRule(const string & name, const shared_ptr<_recognizerT> &rule){
		_extendRule(name, rule);
		return rule;
	}
205 206 207 208 209
	/**
	 * Find a rule from the grammar, given its name.
	 * @param name the name of the rule
	 * @return the recognizer implementing this rule. Is NULL if the rule doesn't exist in the grammar.
	**/
210
	BELR_PUBLIC shared_ptr<Recognizer> findRule(const string &name);
211 212 213 214 215 216 217
	/**
	 * Find a rule from the grammar, given its name.
	 * Unlike findRule(), getRule() never returns NULL. 
	 * If the rule is not (yet) defined, it returns an undefined pointer, that will be set later if the rule gets defined.
	 * This mechanism is required to allow defining rules in any order, and defining rules that call themselve recursively.
	 * @param name the name of the rule to get
	 * @return the recognizer implementing the rule, or a RecognizerPointer if the rule isn't yet defined.
218
	**/
219
	BELR_PUBLIC shared_ptr<Recognizer> getRule(const string &name);
220 221 222 223
	/**
	 * Returns true if the grammar is complete, that is all rules are defined.
	 * In other words, a grammar is complete if no rule depends on another rule which is not defined.
	**/
224
	BELR_PUBLIC bool isComplete()const;
225 226 227 228 229 230
	/**
	 * Optimize the grammar. This is required to obtain good performance of the recognizers implementing the rule.
	 * The optimization step consists in checking whether belr::Selector objects in the grammar are exclusive or not.
	 * A selector is said exclusive when a single sub-rule can match. Knowing this in advance optimizes the processing because no branch
	 * context is to be created to explore the different choices of the selector recognizer.
	**/ 
231
	void optimize();
232 233 234
	/**
	 * Return the number of rules in this grammar.
	**/
235
	int getNumRules()const;
Simon Morlat's avatar
Simon Morlat committed
236 237
private:
	void assignRule(const string &name, const shared_ptr<Recognizer> &rule);
238
	void _extendRule(const string &name, const shared_ptr<Recognizer> &rule);
Simon Morlat's avatar
Simon Morlat committed
239
	map<string,shared_ptr<Recognizer>> mRules;
240
	list<shared_ptr<RecognizerPointer>> mRecognizerPointers;
Simon Morlat's avatar
Simon Morlat committed
241 242 243
	string mName;
};

Simon Morlat's avatar
Simon Morlat committed
244 245 246



Simon Morlat's avatar
Simon Morlat committed
247
}
Simon Morlat's avatar
Simon Morlat committed
248 249

#endif