belr.hh 7.73 KB
Newer Older
Simon Morlat's avatar
Simon Morlat committed
1 2 3
#ifndef belr_hh
#define belr_hh

Guillaume BIENKOWSKI's avatar
Guillaume BIENKOWSKI committed
4
#include <string>
Simon Morlat's avatar
Simon Morlat committed
5 6 7 8 9 10
#include <list>
#include <map>
#include <memory>

using namespace ::std;

11 12 13 14 15 16
#if defined(_MSC_VER)
#define BELR_PUBLIC	__declspec(dllexport)
#else
#define BELR_PUBLIC
#endif

Simon Morlat's avatar
Simon Morlat committed
17
namespace belr{
18
	
19
BELR_PUBLIC string tolower(const string &str);
Simon Morlat's avatar
Simon Morlat committed
20

21
class ParserContextBase;
Simon Morlat's avatar
Simon Morlat committed
22

23 24 25
struct TransitionMap{
	TransitionMap();
	bool intersect(const TransitionMap *other);
26 27
	bool intersect(const TransitionMap *other, TransitionMap *result); //performs a AND operation
	void merge(const TransitionMap *other); //Performs an OR operation
28 29 30
	bool mPossibleChars[256];
};

Simon Morlat's avatar
Simon Morlat committed
31
class Recognizer : public enable_shared_from_this<Recognizer>{
Simon Morlat's avatar
Simon Morlat committed
32 33
public:
	void setName(const string &name);
Simon Morlat's avatar
Simon Morlat committed
34
	const string &getName()const;
35
	BELR_PUBLIC size_t feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
Simon Morlat's avatar
Simon Morlat committed
36 37 38
	unsigned int getId()const{
		return mId;
	}
39 40 41
	bool getTransitionMap(TransitionMap *mask);
	void optimize();
	void optimize(int recursionLevel);
42
	virtual ~Recognizer() { }
Simon Morlat's avatar
Simon Morlat committed
43
protected:
44 45 46
	/*returns true if the transition map is complete, false otherwise*/
	virtual bool _getTransitionMap(TransitionMap *mask);
	virtual void _optimize(int recursionLevel)=0;
Simon Morlat's avatar
Simon Morlat committed
47
	Recognizer();
48
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos)=0;
Simon Morlat's avatar
Simon Morlat committed
49
	string mName;
Simon Morlat's avatar
Simon Morlat committed
50
	unsigned int mId;
Simon Morlat's avatar
Simon Morlat committed
51 52 53 54
};

class CharRecognizer : public Recognizer{
public:
55
	CharRecognizer(int to_recognize, bool caseSensitive=false);
Simon Morlat's avatar
Simon Morlat committed
56
private:
57
	virtual void _optimize(int recursionLevel);
58
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
59 60
	int mToRecognize;
	bool mCaseSensitive;
Simon Morlat's avatar
Simon Morlat committed
61 62
};

Simon Morlat's avatar
Simon Morlat committed
63
class Selector : public Recognizer{
Simon Morlat's avatar
Simon Morlat committed
64 65 66
public:
	Selector();
	shared_ptr<Selector> addRecognizer(const shared_ptr<Recognizer> &element);
67
protected:
68
	virtual void _optimize(int recursionLevel);
69
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
70 71
	size_t _feedExclusive(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
	virtual bool _getTransitionMap(TransitionMap *mask);
Simon Morlat's avatar
Simon Morlat committed
72
	list<shared_ptr<Recognizer>> mElements;
73
	bool mIsExclusive;
Simon Morlat's avatar
Simon Morlat committed
74 75
};

76 77 78 79 80
/**This is an optimization of the first one for the case where there can be only a single match*/
class ExclusiveSelector : public Selector{
public:
	ExclusiveSelector();
private:
81
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
82 83
};

Simon Morlat's avatar
Simon Morlat committed
84
class Sequence : public Recognizer{
Simon Morlat's avatar
Simon Morlat committed
85 86 87
public:
	Sequence();
	shared_ptr<Sequence> addRecognizer(const shared_ptr<Recognizer> &element);
88 89 90
	virtual bool _getTransitionMap(TransitionMap *mask);
protected:
	virtual void _optimize(int recursionLevel);
Simon Morlat's avatar
Simon Morlat committed
91
private:
92
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
Simon Morlat's avatar
Simon Morlat committed
93 94 95
	list<shared_ptr<Recognizer>> mElements;
};

Simon Morlat's avatar
Simon Morlat committed
96
class Loop : public Recognizer{
Simon Morlat's avatar
Simon Morlat committed
97 98 99
public:
	Loop();
	shared_ptr<Loop> setRecognizer(const shared_ptr<Recognizer> &element, int min=0, int max=-1);
100 101 102
	virtual bool _getTransitionMap(TransitionMap *mask);
protected:
	virtual void _optimize(int recursionLevel);
Simon Morlat's avatar
Simon Morlat committed
103
private:
104
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
Simon Morlat's avatar
Simon Morlat committed
105 106 107 108 109 110 111
	shared_ptr<Recognizer> mRecognizer;
	int mMin, mMax;
};


class Foundation{
public:
112
	static shared_ptr<CharRecognizer> charRecognizer(int character, bool caseSensitive=false);
113
	static shared_ptr<Selector> selector(bool isExclusive=false);
Simon Morlat's avatar
Simon Morlat committed
114 115 116 117
	static shared_ptr<Sequence> sequence();
	static shared_ptr<Loop> loop();
};

118 119 120 121 122
/*this is an optimization of a selector with multiple individual char recognizer*/
class CharRange : public Recognizer{
public:
	CharRange(int begin, int end);
private:
123
	virtual void _optimize(int recursionLevel);
124 125 126 127 128 129 130
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
	int mBegin,mEnd;
};

class Literal : public Recognizer{
public:
	Literal(const string &lit);
131
	virtual bool _getTransitionMap(TransitionMap *mask);
132
private:
133
	virtual void _optimize(int recursionLevel);
134 135 136 137 138
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
	string mLiteral;
	size_t mLiteralSize;
};

Simon Morlat's avatar
Simon Morlat committed
139 140 141 142 143 144 145 146 147 148 149 150
class Utils{
public:
	static shared_ptr<Recognizer> literal(const string & lt);
	static shared_ptr<Recognizer> char_range(int begin, int end);
};

class RecognizerPointer :  public Recognizer{
public:
	RecognizerPointer();
	shared_ptr<Recognizer> getPointed();
	void setPointed(const shared_ptr<Recognizer> &r);
private:
151
	virtual void _optimize(int recursionLevel);
152
	virtual size_t _feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos);
Simon Morlat's avatar
Simon Morlat committed
153 154 155
	shared_ptr<Recognizer> mRecognizer;
};

156 157 158
/**
 * Grammar class represents an ABNF grammar, with all its rules.
**/
Simon Morlat's avatar
Simon Morlat committed
159 160
class Grammar{
public:
161 162 163
	/**
	 * Initialize an empty grammar, giving a name for debugging.
	**/
164
	BELR_PUBLIC Grammar(const string &name);
165
	
166
	BELR_PUBLIC ~Grammar();
167
	
168 169 170
	/**
	 * Include another grammar into this grammar.
	**/
171
	BELR_PUBLIC void include(const shared_ptr<Grammar>& grammar);
172 173 174 175 176 177
	/**
	 * Add arule to the grammar.
	 * @param name the name of the rule
	 * @param rule the rule recognier, must be an instance of belr::Recognizer.
	 * @return the rule (the recognizer). The recognizer is given the name of the rule.
	 * @note The grammar takes ownership of the recognizer, which must not be used outside of this grammar.
Simon Morlat's avatar
Simon Morlat committed
178 179
	 * TODO: use unique_ptr to enforce this, or make a copy ?
	**/
Simon Morlat's avatar
Simon Morlat committed
180 181 182 183 184
	template <typename _recognizerT>
	shared_ptr<_recognizerT> addRule(const string & name, const shared_ptr<_recognizerT> &rule){
		assignRule(name, rule);
		return rule;
	}
185 186 187 188 189 190 191
	/**
	 * Extend a rule from the grammar.
	 * This corresponds to the '/=' operator of ABNF definition.
	 * @param name the name of the rule to extend.
	 * @param rule the recognizer of the extension.
	 * @return the rule.
	**/
192 193 194 195 196
	template <typename _recognizerT>
	shared_ptr<_recognizerT> extendRule(const string & name, const shared_ptr<_recognizerT> &rule){
		_extendRule(name, rule);
		return rule;
	}
197 198 199 200 201
	/**
	 * Find a rule from the grammar, given its name.
	 * @param name the name of the rule
	 * @return the recognizer implementing this rule. Is NULL if the rule doesn't exist in the grammar.
	**/
202
	BELR_PUBLIC shared_ptr<Recognizer> findRule(const string &name);
203 204 205 206 207 208 209
	/**
	 * Find a rule from the grammar, given its name.
	 * Unlike findRule(), getRule() never returns NULL. 
	 * If the rule is not (yet) defined, it returns an undefined pointer, that will be set later if the rule gets defined.
	 * This mechanism is required to allow defining rules in any order, and defining rules that call themselve recursively.
	 * @param name the name of the rule to get
	 * @return the recognizer implementing the rule, or a RecognizerPointer if the rule isn't yet defined.
210
	**/
211
	BELR_PUBLIC shared_ptr<Recognizer> getRule(const string &name);
212 213 214 215
	/**
	 * Returns true if the grammar is complete, that is all rules are defined.
	 * In other words, a grammar is complete if no rule depends on another rule which is not defined.
	**/
216
	BELR_PUBLIC bool isComplete()const;
217 218 219 220 221 222
	/**
	 * Optimize the grammar. This is required to obtain good performance of the recognizers implementing the rule.
	 * The optimization step consists in checking whether belr::Selector objects in the grammar are exclusive or not.
	 * A selector is said exclusive when a single sub-rule can match. Knowing this in advance optimizes the processing because no branch
	 * context is to be created to explore the different choices of the selector recognizer.
	**/ 
223
	void optimize();
224 225 226
	/**
	 * Return the number of rules in this grammar.
	**/
227
	int getNumRules()const;
Simon Morlat's avatar
Simon Morlat committed
228 229
private:
	void assignRule(const string &name, const shared_ptr<Recognizer> &rule);
230
	void _extendRule(const string &name, const shared_ptr<Recognizer> &rule);
Simon Morlat's avatar
Simon Morlat committed
231
	map<string,shared_ptr<Recognizer>> mRules;
232
	list<shared_ptr<RecognizerPointer>> mRecognizerPointers;
Simon Morlat's avatar
Simon Morlat committed
233 234 235
	string mName;
};

Simon Morlat's avatar
Simon Morlat committed
236 237 238



Simon Morlat's avatar
Simon Morlat committed
239
}
Simon Morlat's avatar
Simon Morlat committed
240 241

#endif