belr.hh 7.49 KB
Newer Older
Simon Morlat's avatar
Simon Morlat committed
1 2 3
#ifndef belr_hh
#define belr_hh

Guillaume BIENKOWSKI's avatar
Guillaume BIENKOWSKI committed
4
#include <string>
Simon Morlat's avatar
Simon Morlat committed
5 6 7 8 9 10 11
#include <list>
#include <map>
#include <memory>

using namespace ::std;

namespace belr{
12 13
	
string tolower(const string &str);
Simon Morlat's avatar
Simon Morlat committed
14

15
class ParserContextBase;
Simon Morlat's avatar
Simon Morlat committed
16

17 18 19
struct TransitionMap{
	TransitionMap();
	bool intersect(const TransitionMap *other);
20 21
	bool intersect(const TransitionMap *other, TransitionMap *result); //performs a AND operation
	void merge(const TransitionMap *other); //Performs an OR operation
22 23 24
	bool mPossibleChars[256];
};

Simon Morlat's avatar
Simon Morlat committed
25
class Recognizer : public enable_shared_from_this<Recognizer>{
Simon Morlat's avatar
Simon Morlat committed
26 27
public:
	void setName(const string &name);
Simon Morlat's avatar
Simon Morlat committed
28
	const string &getName()const;
29
	size_t feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
Simon Morlat's avatar
Simon Morlat committed
30 31 32
	unsigned int getId()const{
		return mId;
	}
33 34 35
	bool getTransitionMap(TransitionMap *mask);
	void optimize();
	void optimize(int recursionLevel);
Simon Morlat's avatar
Simon Morlat committed
36
protected:
37 38 39
	/*returns true if the transition map is complete, false otherwise*/
	virtual bool _getTransitionMap(TransitionMap *mask);
	virtual void _optimize(int recursionLevel)=0;
Simon Morlat's avatar
Simon Morlat committed
40
	Recognizer();
41
	virtual ~Recognizer() { }
42
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos)=0;
Simon Morlat's avatar
Simon Morlat committed
43
	string mName;
Simon Morlat's avatar
Simon Morlat committed
44
	unsigned int mId;
Simon Morlat's avatar
Simon Morlat committed
45 46 47 48
};

class CharRecognizer : public Recognizer{
public:
49
	CharRecognizer(int to_recognize, bool caseSensitive=false);
Simon Morlat's avatar
Simon Morlat committed
50
private:
51
	virtual void _optimize(int recursionLevel);
52
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
53 54
	int mToRecognize;
	bool mCaseSensitive;
Simon Morlat's avatar
Simon Morlat committed
55 56
};

Simon Morlat's avatar
Simon Morlat committed
57
class Selector : public Recognizer{
Simon Morlat's avatar
Simon Morlat committed
58 59 60
public:
	Selector();
	shared_ptr<Selector> addRecognizer(const shared_ptr<Recognizer> &element);
61
protected:
62
	virtual void _optimize(int recursionLevel);
63
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
64 65
	size_t _feedExclusive(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
	virtual bool _getTransitionMap(TransitionMap *mask);
Simon Morlat's avatar
Simon Morlat committed
66
	list<shared_ptr<Recognizer>> mElements;
67
	bool mIsExclusive;
Simon Morlat's avatar
Simon Morlat committed
68 69
};

70 71 72 73 74
/**This is an optimization of the first one for the case where there can be only a single match*/
class ExclusiveSelector : public Selector{
public:
	ExclusiveSelector();
private:
75
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
76 77
};

Simon Morlat's avatar
Simon Morlat committed
78
class Sequence : public Recognizer{
Simon Morlat's avatar
Simon Morlat committed
79 80 81
public:
	Sequence();
	shared_ptr<Sequence> addRecognizer(const shared_ptr<Recognizer> &element);
82 83 84
	virtual bool _getTransitionMap(TransitionMap *mask);
protected:
	virtual void _optimize(int recursionLevel);
Simon Morlat's avatar
Simon Morlat committed
85
private:
86
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
Simon Morlat's avatar
Simon Morlat committed
87 88 89
	list<shared_ptr<Recognizer>> mElements;
};

Simon Morlat's avatar
Simon Morlat committed
90
class Loop : public Recognizer{
Simon Morlat's avatar
Simon Morlat committed
91 92 93
public:
	Loop();
	shared_ptr<Loop> setRecognizer(const shared_ptr<Recognizer> &element, int min=0, int max=-1);
94 95 96
	virtual bool _getTransitionMap(TransitionMap *mask);
protected:
	virtual void _optimize(int recursionLevel);
Simon Morlat's avatar
Simon Morlat committed
97
private:
98
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
Simon Morlat's avatar
Simon Morlat committed
99 100 101 102 103 104 105
	shared_ptr<Recognizer> mRecognizer;
	int mMin, mMax;
};


class Foundation{
public:
106
	static shared_ptr<CharRecognizer> charRecognizer(int character, bool caseSensitive=false);
107
	static shared_ptr<Selector> selector(bool isExclusive=false);
Simon Morlat's avatar
Simon Morlat committed
108 109 110 111
	static shared_ptr<Sequence> sequence();
	static shared_ptr<Loop> loop();
};

112 113 114 115 116
/*this is an optimization of a selector with multiple individual char recognizer*/
class CharRange : public Recognizer{
public:
	CharRange(int begin, int end);
private:
117
	virtual void _optimize(int recursionLevel);
118 119 120 121 122 123 124
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
	int mBegin,mEnd;
};

class Literal : public Recognizer{
public:
	Literal(const string &lit);
125
	virtual bool _getTransitionMap(TransitionMap *mask);
126
private:
127
	virtual void _optimize(int recursionLevel);
128 129 130 131 132
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
	string mLiteral;
	size_t mLiteralSize;
};

Simon Morlat's avatar
Simon Morlat committed
133 134 135 136 137 138 139 140 141 142 143 144
class Utils{
public:
	static shared_ptr<Recognizer> literal(const string & lt);
	static shared_ptr<Recognizer> char_range(int begin, int end);
};

class RecognizerPointer :  public Recognizer{
public:
	RecognizerPointer();
	shared_ptr<Recognizer> getPointed();
	void setPointed(const shared_ptr<Recognizer> &r);
private:
145
	virtual void _optimize(int recursionLevel);
146
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
Simon Morlat's avatar
Simon Morlat committed
147 148 149
	shared_ptr<Recognizer> mRecognizer;
};

150 151 152
/**
 * Grammar class represents an ABNF grammar, with all its rules.
**/
Simon Morlat's avatar
Simon Morlat committed
153 154
class Grammar{
public:
155 156 157
	/**
	 * Initialize an empty grammar, giving a name for debugging.
	**/
Simon Morlat's avatar
Simon Morlat committed
158
	Grammar(const string &name);
159 160 161
	
	virtual ~Grammar() { }
	
162 163 164
	/**
	 * Include another grammar into this grammar.
	**/
Simon Morlat's avatar
Simon Morlat committed
165
	void include(const shared_ptr<Grammar>& grammar);
166 167 168 169 170 171
	/**
	 * Add arule to the grammar.
	 * @param name the name of the rule
	 * @param rule the rule recognier, must be an instance of belr::Recognizer.
	 * @return the rule (the recognizer). The recognizer is given the name of the rule.
	 * @note The grammar takes ownership of the recognizer, which must not be used outside of this grammar.
Simon Morlat's avatar
Simon Morlat committed
172 173
	 * TODO: use unique_ptr to enforce this, or make a copy ?
	**/
Simon Morlat's avatar
Simon Morlat committed
174 175 176 177 178
	template <typename _recognizerT>
	shared_ptr<_recognizerT> addRule(const string & name, const shared_ptr<_recognizerT> &rule){
		assignRule(name, rule);
		return rule;
	}
179 180 181 182 183 184 185
	/**
	 * Extend a rule from the grammar.
	 * This corresponds to the '/=' operator of ABNF definition.
	 * @param name the name of the rule to extend.
	 * @param rule the recognizer of the extension.
	 * @return the rule.
	**/
186 187 188 189 190
	template <typename _recognizerT>
	shared_ptr<_recognizerT> extendRule(const string & name, const shared_ptr<_recognizerT> &rule){
		_extendRule(name, rule);
		return rule;
	}
191 192 193 194 195
	/**
	 * Find a rule from the grammar, given its name.
	 * @param name the name of the rule
	 * @return the recognizer implementing this rule. Is NULL if the rule doesn't exist in the grammar.
	**/
196
	shared_ptr<Recognizer> findRule(const string &name);
197 198 199 200 201 202 203
	/**
	 * Find a rule from the grammar, given its name.
	 * Unlike findRule(), getRule() never returns NULL. 
	 * If the rule is not (yet) defined, it returns an undefined pointer, that will be set later if the rule gets defined.
	 * This mechanism is required to allow defining rules in any order, and defining rules that call themselve recursively.
	 * @param name the name of the rule to get
	 * @return the recognizer implementing the rule, or a RecognizerPointer if the rule isn't yet defined.
204
	**/
Simon Morlat's avatar
Simon Morlat committed
205
	shared_ptr<Recognizer> getRule(const string &name);
206 207 208 209
	/**
	 * Returns true if the grammar is complete, that is all rules are defined.
	 * In other words, a grammar is complete if no rule depends on another rule which is not defined.
	**/
Simon Morlat's avatar
Simon Morlat committed
210
	bool isComplete()const;
211 212 213 214 215 216
	/**
	 * Optimize the grammar. This is required to obtain good performance of the recognizers implementing the rule.
	 * The optimization step consists in checking whether belr::Selector objects in the grammar are exclusive or not.
	 * A selector is said exclusive when a single sub-rule can match. Knowing this in advance optimizes the processing because no branch
	 * context is to be created to explore the different choices of the selector recognizer.
	**/ 
217
	void optimize();
218 219 220
	/**
	 * Return the number of rules in this grammar.
	**/
221
	int getNumRules()const;
Simon Morlat's avatar
Simon Morlat committed
222 223
private:
	void assignRule(const string &name, const shared_ptr<Recognizer> &rule);
224
	void _extendRule(const string &name, const shared_ptr<Recognizer> &rule);
Simon Morlat's avatar
Simon Morlat committed
225 226 227 228
	map<string,shared_ptr<Recognizer>> mRules;
	string mName;
};

Simon Morlat's avatar
Simon Morlat committed
229 230 231



Simon Morlat's avatar
Simon Morlat committed
232
}
Simon Morlat's avatar
Simon Morlat committed
233 234

#endif