belr.cc 12.1 KB
Newer Older
Simon Morlat's avatar
Simon Morlat committed
1 2


3 4
#include "belr/belr.hh"
#include "belr/parser.hh"
Simon Morlat's avatar
Simon Morlat committed
5 6 7 8 9
#include <algorithm>
#include <iostream>

namespace belr{

10 11 12 13 14 15 16 17 18 19 20 21
TransitionMap::TransitionMap(){
	for(size_t i=0;i<sizeof(mPossibleChars)/sizeof(bool);++i)
		mPossibleChars[i]=false;
}

bool TransitionMap::intersect(const TransitionMap* other){
	for(size_t i=0;i<sizeof(mPossibleChars)/sizeof(bool);++i){
		if (mPossibleChars[i] && other->mPossibleChars[i]) return true;
	}
	return false;
}

22 23 24 25 26 27 28 29 30 31
bool TransitionMap::intersect(const TransitionMap *other, TransitionMap *result){
	bool ret=false;
	for(size_t i=0;i<sizeof(mPossibleChars)/sizeof(bool);++i){
		bool tmp=mPossibleChars[i] && other->mPossibleChars[i];
		result->mPossibleChars[i]=tmp;
		if (tmp) ret=true;
	}
	return ret;
}

32 33 34 35 36 37 38 39
void TransitionMap::merge(const TransitionMap* other){
	for(size_t i=0;i<sizeof(mPossibleChars)/sizeof(bool);++i){
		if (other->mPossibleChars[i]) mPossibleChars[i]=true;
	}
}



Simon Morlat's avatar
Simon Morlat committed
40 41 42 43
Recognizer::Recognizer(){
}

void Recognizer::setName(const string& name){
Simon Morlat's avatar
Simon Morlat committed
44
	static unsigned int id_base=0;
Simon Morlat's avatar
Simon Morlat committed
45
	mName=name;
Simon Morlat's avatar
Simon Morlat committed
46 47
	/* every recognizer that is given a name is given also a unique id*/
	mId=++id_base;
Simon Morlat's avatar
Simon Morlat committed
48 49
}

Simon Morlat's avatar
Simon Morlat committed
50 51 52 53
const string &Recognizer::getName()const{
	return mName;
}

54
size_t Recognizer::feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos){
Simon Morlat's avatar
Simon Morlat committed
55 56
	size_t match;
	
57 58
	ParserLocalContext hctx;
	if (ctx) ctx->beginParse(hctx, shared_from_this());
Simon Morlat's avatar
Simon Morlat committed
59 60
	match=_feed(ctx, input, pos);
	if (match!=string::npos && match>0){
Simon Morlat's avatar
Simon Morlat committed
61
		if (0 && mName.size()>0){
Simon Morlat's avatar
Simon Morlat committed
62 63 64
			string matched=input.substr(pos,match);
			cout<<"Matched recognizer '"<<mName<<"' with sequence '"<<matched<<"'."<<endl;
		}
Simon Morlat's avatar
Simon Morlat committed
65
	}
66
	if (ctx) ctx->endParse(hctx, input, pos, match);
Simon Morlat's avatar
Simon Morlat committed
67
	
Simon Morlat's avatar
Simon Morlat committed
68 69 70
	return match;
}

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
bool Recognizer::getTransitionMap(TransitionMap* mask){
	bool ret=_getTransitionMap(mask);
	if (0 /*!mName.empty()*/){
		cout<<"TransitionMap after "<<mName<<endl;
		for(int i=0;i<256;++i){
			if (mask->mPossibleChars[i]) cout<<(char)i;
		}
		cout<<endl;
	}
	return ret;
}


bool Recognizer::_getTransitionMap(TransitionMap* mask){
	string input;
	input.resize(2,'\0');
	for(int i=0;i<256;++i){
		input[0]=i;
		if (feed(NULL,input,0)==1)
			mask->mPossibleChars[i]=true;
	}
	return true;
}

void Recognizer::optimize(){
	optimize(0);
}

void Recognizer::optimize(int recursionLevel){
	if (recursionLevel!=0 && mId!=0) return; /*stop on rule except at level 0*/
	_optimize(++recursionLevel);
}


105 106 107 108 109 110 111
CharRecognizer::CharRecognizer(int to_recognize, bool caseSensitive) : mToRecognize(to_recognize), mCaseSensitive(caseSensitive){
	if (::tolower(to_recognize)==::toupper(to_recognize)){
		/*not a case caseSensitive character*/
		mCaseSensitive=true;/*no need to perform a case insensitive match*/
	}else if (!caseSensitive){
		mToRecognize=::tolower(mToRecognize);
	}
Simon Morlat's avatar
Simon Morlat committed
112 113
}

114
size_t CharRecognizer::_feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos){
115 116 117 118
	if (mCaseSensitive){
		return input[pos]==mToRecognize ? 1 : string::npos;
	}
	return ::tolower(input[pos])==mToRecognize ? 1 : string::npos;
Simon Morlat's avatar
Simon Morlat committed
119 120
}

121 122 123 124 125 126
void CharRecognizer::_optimize(int recursionLevel){

}


Selector::Selector() : mIsExclusive(false){
Simon Morlat's avatar
Simon Morlat committed
127 128 129 130
}

shared_ptr<Selector> Selector::addRecognizer(const shared_ptr<Recognizer> &r){
	mElements.push_back(r);
Simon Morlat's avatar
Simon Morlat committed
131
	return static_pointer_cast<Selector> (shared_from_this());
Simon Morlat's avatar
Simon Morlat committed
132 133
}

134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
bool Selector::_getTransitionMap(TransitionMap* mask){
    for (auto it=mElements.begin(); it!=mElements.end(); ++it){
	    (*it)->getTransitionMap(mask);
    }
    return true;
}

size_t Selector::_feedExclusive(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos){
	size_t matched=0;
	
	for (auto it=mElements.begin(); it!=mElements.end(); ++it){
		matched=(*it)->feed(ctx, input, pos);
		if (matched!=string::npos && matched>0) {
			return matched;
		}
	}
	return string::npos;
}

153
size_t Selector::_feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos){
154 155
	if (mIsExclusive) return _feedExclusive(ctx, input, pos);
	
Simon Morlat's avatar
Simon Morlat committed
156
	size_t matched=0;
157
	size_t bestmatch=0;
158
	shared_ptr<HandlerContextBase> bestBranch;
Simon Morlat's avatar
Simon Morlat committed
159 160
	
	for (auto it=mElements.begin(); it!=mElements.end(); ++it){
161 162
		shared_ptr<HandlerContextBase> br;
		if (ctx) br=ctx->branch();
163
		matched=(*it)->feed(ctx, input, pos);
164 165
		if (matched!=string::npos && matched>bestmatch) {
			bestmatch=matched;
166 167 168
			if (bestBranch) ctx->removeBranch(bestBranch);
			bestBranch=br;
		}else{
169 170
			if (ctx)
				ctx->removeBranch(br);
171
		}
Simon Morlat's avatar
Simon Morlat committed
172
	}
173
	if (bestmatch==0) return string::npos;
174
	if (ctx && bestmatch!=string::npos){
175
		ctx->merge(bestBranch);
Simon Morlat's avatar
Simon Morlat committed
176
	}
177
	return bestmatch;
Simon Morlat's avatar
Simon Morlat committed
178 179
}

180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
void Selector::_optimize(int recursionLevel){
	for (auto it=mElements.begin(); it!=mElements.end(); ++it){
		(*it)->optimize(recursionLevel);
	}
	TransitionMap *all=NULL;
	bool intersectionFound=false;
	for (auto it=mElements.begin(); it!=mElements.end() && !intersectionFound; ++it){
		TransitionMap *cur=new TransitionMap();
		(*it)->getTransitionMap(cur);
		if (all){
			if (cur->intersect(all)){
				intersectionFound=true;
			}
			all->merge(cur);
			delete cur;
		}else all=cur;
	}
	if (all) delete all;
	if (!intersectionFound){
		//cout<<"Selector '"<<getName()<<"' is exclusive."<<endl;
		mIsExclusive=true;
	}
}


205
ExclusiveSelector::ExclusiveSelector(){
206
	mIsExclusive=true;
207 208
}

209
size_t ExclusiveSelector::_feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos){
210
	return Selector::_feedExclusive(ctx, input, pos);
211 212 213
}


Simon Morlat's avatar
Simon Morlat committed
214 215 216 217 218
Sequence::Sequence(){
}

shared_ptr<Sequence> Sequence::addRecognizer(const shared_ptr<Recognizer> &element){
	mElements.push_back(element);
Simon Morlat's avatar
Simon Morlat committed
219
	return static_pointer_cast<Sequence>( shared_from_this());
Simon Morlat's avatar
Simon Morlat committed
220 221
}

222 223 224 225 226 227 228 229 230 231 232 233
bool Sequence::_getTransitionMap(TransitionMap* mask){
	bool isComplete=false;
	for (auto it=mElements.begin(); it!=mElements.end(); ++it){
		if ((*it)->getTransitionMap(mask)) {
			isComplete=true;
			break;
		}
	}
	return isComplete;
}


234
size_t Sequence::_feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos){
Simon Morlat's avatar
Simon Morlat committed
235 236 237 238
	size_t matched=0;
	size_t total=0;
	
	for (auto it=mElements.begin(); it!=mElements.end(); ++it){
Simon Morlat's avatar
Simon Morlat committed
239
		matched=(*it)->feed(ctx, input, pos);
Simon Morlat's avatar
Simon Morlat committed
240 241 242 243 244 245 246 247 248
		if (matched==string::npos){
			return string::npos;
		}
		pos+=matched;
		total+=matched;
	}
	return total;
}

249 250 251 252 253 254
void Sequence::_optimize(int recursionLevel){
	for (auto it=mElements.begin(); it!=mElements.end(); ++it)
		(*it)->optimize(recursionLevel);
}


Simon Morlat's avatar
Simon Morlat committed
255 256 257 258 259 260 261 262 263
Loop::Loop(){
	mMin=0;
	mMax=-1;
}

shared_ptr<Loop> Loop::setRecognizer(const shared_ptr<Recognizer> &element, int min, int max){
	mMin=min;
	mMax=max;
	mRecognizer=element;
Simon Morlat's avatar
Simon Morlat committed
264
	return static_pointer_cast<Loop>(shared_from_this());
Simon Morlat's avatar
Simon Morlat committed
265 266
}

267
size_t Loop::_feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos){
Simon Morlat's avatar
Simon Morlat committed
268 269 270 271 272
	size_t matched=0;
	size_t total=0;
	int repeat;
	
	for(repeat=0;mMax!=-1 ? repeat<mMax : true;repeat++){
Simon Morlat's avatar
Simon Morlat committed
273
		matched=mRecognizer->feed(ctx, input, pos);
Simon Morlat's avatar
Simon Morlat committed
274 275 276
		if (matched==string::npos) break;
		total+=matched;
		pos+=matched;
277
		if (input[pos]=='\0') break;
Simon Morlat's avatar
Simon Morlat committed
278 279 280 281 282
	}
	if (repeat<mMin) return string::npos;
	return total;
}

283 284 285 286 287 288 289 290 291 292
bool Loop::_getTransitionMap(TransitionMap* mask){
	mRecognizer->getTransitionMap(mask);
	return mMin!=0; //we must say to upper layer that this loop recognizer is allowed to be optional by returning FALSE
}

void Loop::_optimize(int recursionLevel){
	mRecognizer->optimize(recursionLevel);
}


293 294 295
CharRange::CharRange(int begin, int end) : mBegin(begin), mEnd(end){
}

296
size_t CharRange::_feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos){
297 298 299 300 301
	int c=input[pos];
	if (c>=mBegin && c<=mEnd) return 1;
	return string::npos;
}

302 303 304 305 306
void CharRange::_optimize(int recursionLevel){

}


307 308
shared_ptr<CharRecognizer> Foundation::charRecognizer(int character, bool caseSensitive){
	return make_shared<CharRecognizer>(character, caseSensitive);
Simon Morlat's avatar
Simon Morlat committed
309 310
}

311 312
shared_ptr<Selector> Foundation::selector(bool isExclusive){
	return isExclusive ? make_shared<ExclusiveSelector>() : make_shared<Selector>();
Simon Morlat's avatar
Simon Morlat committed
313 314 315 316 317 318 319 320 321 322
}

shared_ptr<Sequence> Foundation::sequence(){
	return make_shared<Sequence>();
}

shared_ptr<Loop> Foundation::loop(){
	return make_shared<Loop>();
}

323 324 325 326 327
Literal::Literal(const string& lit) : mLiteral(tolower(lit)){
	mLiteralSize=mLiteral.size();
}

size_t Literal::_feed(const shared_ptr< ParserContextBase >& ctx, const string& input, size_t pos){
Simon Morlat's avatar
Simon Morlat committed
328
	size_t i;
329 330
	for(i=0;i<mLiteralSize;++i){
		if (::tolower(input[pos+i])!=mLiteral[i]) return string::npos;
Simon Morlat's avatar
Simon Morlat committed
331
	}
332 333 334
	return mLiteralSize;
}

335 336 337 338 339 340 341 342 343 344
void Literal::_optimize(int recursionLevel){

}

bool Literal::_getTransitionMap(TransitionMap* mask){
	mask->mPossibleChars[::tolower(mLiteral[0])]=true;
	mask->mPossibleChars[::toupper(mLiteral[0])]=true;
	return true;
}

345 346
shared_ptr<Recognizer> Utils::literal(const string & lt){
	return make_shared<Literal>(lt);
Simon Morlat's avatar
Simon Morlat committed
347 348 349
}

shared_ptr<Recognizer> Utils::char_range(int begin, int end){
350
	return make_shared<CharRange>(begin, end);
Simon Morlat's avatar
Simon Morlat committed
351 352 353 354 355 356 357 358 359
}

RecognizerPointer::RecognizerPointer() : mRecognizer(NULL){
}

shared_ptr<Recognizer> RecognizerPointer::getPointed(){
	return mRecognizer;
}

360
size_t RecognizerPointer::_feed(const shared_ptr<ParserContextBase> &ctx, const string &input, size_t pos){
Simon Morlat's avatar
Simon Morlat committed
361
	if (mRecognizer){
Simon Morlat's avatar
Simon Morlat committed
362
		return mRecognizer->feed(ctx, input, pos);
Simon Morlat's avatar
Simon Morlat committed
363
	}else{
Simon Morlat's avatar
Simon Morlat committed
364
		cerr<<"RecognizerPointer with name '"<<mName<<"' is undefined"<<endl;
Simon Morlat's avatar
Simon Morlat committed
365 366 367 368 369 370 371 372 373
		abort();
	}
	return string::npos;
}

void RecognizerPointer::setPointed(const shared_ptr<Recognizer> &r){
	mRecognizer=r;
}

374 375 376 377 378 379
void RecognizerPointer::_optimize(int recursionLevel){
	/*do not call optimize() on the pointed value to avoid a loop.
	 * The grammar will do it for all rules anyway*/
}


Simon Morlat's avatar
Simon Morlat committed
380 381 382 383 384 385
Grammar::Grammar(const string& name) : mName(name){

}


void Grammar::assignRule(const string &argname, const shared_ptr<Recognizer> &rule){
386
	string name=tolower(argname);
Simon Morlat's avatar
Simon Morlat committed
387 388 389 390 391 392 393 394 395 396 397
	rule->setName(name);
	auto it=mRules.find(name);
	if (it!=mRules.end()){
		shared_ptr<RecognizerPointer> pointer=dynamic_pointer_cast<RecognizerPointer>((*it).second);
		if (pointer){
			pointer->setPointed(rule);
		}else{
			cerr<<"Error: rule '"<<name<<"' is being redefined !"<<endl;
			abort();
		}
	}
Simon Morlat's avatar
Simon Morlat committed
398 399
	/*in any case the map should contain real recognizers (not just pointers) */
	mRules[name]=rule;
Simon Morlat's avatar
Simon Morlat committed
400 401
}

402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
void Grammar::_extendRule(const string &argname, const shared_ptr<Recognizer> &rule){
	string name=tolower(argname);
	rule->setName("");
	auto it=mRules.find(name);
	if (it!=mRules.end()){
		shared_ptr<Selector> sel=dynamic_pointer_cast<Selector>((*it).second);
		if (sel){
			sel->addRecognizer(rule);
		}else{
			cerr<<"Error: rule '"<<name<<"' cannot be extended because it was not defined with a Selector."<<endl;
			abort();
		}
	}else{
		cerr<<"Error: rule '"<<name<<"' cannot be extended because it is not defined."<<endl;
		abort();
	}
}

420
shared_ptr<Recognizer> Grammar::findRule(const string &argname){
421
	string name=tolower(argname);
Simon Morlat's avatar
Simon Morlat committed
422 423
	auto it=mRules.find(name);
	if (it!=mRules.end()){
424 425 426 427 428 429 430 431 432 433
		return (*it).second;
	}
	return NULL;
}

shared_ptr<Recognizer> Grammar::getRule(const string &argname){
	shared_ptr<Recognizer> ret=findRule(argname);

	if (ret){
		shared_ptr<RecognizerPointer> pointer=dynamic_pointer_cast<RecognizerPointer>(ret);
Simon Morlat's avatar
Simon Morlat committed
434 435 436 437 438 439 440
		if (pointer){
			if (pointer->getPointed()){/*if pointer is defined return the pointed value directly*/
				return pointer->getPointed();
			}else{
				return pointer;
			}
		}
441
		return ret;
Simon Morlat's avatar
Simon Morlat committed
442 443
	}else{/*the rule doesn't exist yet: return a pointer*/
		ret=make_shared<RecognizerPointer>();
444
		string name=tolower(argname);
Simon Morlat's avatar
Simon Morlat committed
445
		ret->setName(string("@")+name);
Simon Morlat's avatar
Simon Morlat committed
446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
		mRules[name]=ret;
	}
	return ret;
}

void Grammar::include(const shared_ptr<Grammar>& grammar){
	for(auto it=grammar->mRules.begin();it!=grammar->mRules.end();++it){
		if (mRules.find((*it).first)!=mRules.end()){
			cerr<<"Rule '"<<(*it).first<<"' is being redefined while including grammar '"<<grammar->mName<<"' into '"<<mName<<"'"<<endl;
		}
		mRules[(*it).first]=(*it).second;
	}
}

bool Grammar::isComplete()const{
	bool ret=true;
	for(auto it=mRules.begin(); it!=mRules.end(); ++it){
		shared_ptr<RecognizerPointer> pointer=dynamic_pointer_cast<RecognizerPointer>((*it).second);
		if (pointer && !pointer->getPointed()){
			cerr<<"Rule '"<<(*it).first<<"' is not defined."<<endl;
			ret=false;
		}
	}
	return ret;
}

472 473 474 475 476 477 478
void Grammar::optimize(){
	for(auto it=mRules.begin(); it!=mRules.end(); ++it){
		(*it).second->optimize();
	}
}


479 480 481 482 483
int Grammar::getNumRules() const{
	return mRules.size();
}


484
string tolower(const string &str){
Simon Morlat's avatar
Simon Morlat committed
485 486 487 488 489 490
	string ret(str);
	transform(ret.begin(),ret.end(), ret.begin(), ::tolower);
	return ret;
}

}