grammarbuilder.cpp 12.6 KB
Newer Older
1 2 3
/*
 * Copyright (C) 2017  Belledonne Communications SARL
 *
4 5 6 7
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
8 9 10 11 12
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
13
 * 
14
 * You should have received a copy of the GNU General Public License
15 16
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17
 */
18

Ronan's avatar
Ronan committed
19 20 21 22
#include <fstream>

#include <bctoolbox/logging.h>

Ronan's avatar
Ronan committed
23 24
#include "belr/abnf.h"
#include "belr/parser.h"
Ronan's avatar
Ronan committed
25
#include "belr/grammarbuilder.h"
26

27 28
#include "config.h"

29
using namespace std;
30

Ronan's avatar
Ronan committed
31 32
// =============================================================================

33
namespace belr{
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
shared_ptr< ABNFNumval > ABNFNumval::create(){
	return make_shared<ABNFNumval>();
}

shared_ptr< Recognizer > ABNFNumval::buildRecognizer(const shared_ptr< Grammar >& grammar){
	if (mIsRange){
		return Utils::char_range(mValues[0],mValues[1]);
	}else{
		auto seq=Foundation::sequence();
		for (auto it=mValues.begin();it!=mValues.end();++it){
			seq->addRecognizer(Foundation::charRecognizer(*it,true));
		}
		return seq;
	}
}

void ABNFNumval::parseValues(const string &val, int base){
	size_t dash=val.find('-');
	if (dash!=string::npos){
		mIsRange=true;
		string first=val.substr(1,dash-1);
		string last=val.substr(dash+1,string::npos);
Ronan's avatar
Ronan committed
56 57
		mValues.push_back(strtol(first.c_str(),nullptr,base));
		mValues.push_back(strtol(last.c_str(),nullptr,base));
58 59 60 61
	}else{
		mIsRange=false;
		string tmp=val.substr(1,string::npos);
		const char *s=tmp.c_str();
Ronan's avatar
Ronan committed
62
		char *endptr=nullptr;
63 64
		do{
			long lv=strtol(s,&endptr,base);
65 66 67
			if (lv == 0 && s == endptr) {
				break;
			}
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
			if (*endptr=='.') s=endptr+1;
			else s=endptr;
			mValues.push_back(lv);
		}while(*s!='\0');
	}
}

void ABNFNumval::setDecVal(const string& decval){
	parseValues(decval,10);
}

void ABNFNumval::setHexVal(const string& hexval){
	parseValues(hexval,16);
}

void ABNFNumval::setBinVal(const string& binval){
	parseValues(binval,2);
}

shared_ptr< Recognizer > ABNFOption::buildRecognizer(const shared_ptr< Grammar >& grammar){
	return Foundation::loop()->setRecognizer(mAlternation->buildRecognizer(grammar),0,1);
}

shared_ptr< ABNFOption > ABNFOption::create(){
	return make_shared<ABNFOption>();
}

void ABNFOption::setAlternation(const shared_ptr< ABNFAlternation >& a){
	mAlternation=a;
}

Simon Morlat's avatar
Simon Morlat committed
99 100 101 102 103 104 105 106 107 108 109 110 111
shared_ptr< ABNFGroup > ABNFGroup::create(){
	return make_shared<ABNFGroup>();
}

shared_ptr< Recognizer > ABNFGroup::buildRecognizer(const shared_ptr< Grammar >& grammar){
	return mAlternation->buildRecognizer(grammar);
}

void ABNFGroup::setAlternation(const shared_ptr< ABNFAlternation >& a){
	mAlternation=a;
}

shared_ptr< Recognizer > ABNFElement::buildRecognizer(const shared_ptr< Grammar >& grammar){
112
	if (mElement)
Simon Morlat's avatar
Simon Morlat committed
113
		return mElement->buildRecognizer(grammar);
114
	if (!mRulename.empty())
Simon Morlat's avatar
Simon Morlat committed
115
		return grammar->getRule(mRulename);
116 117 118
	if (!mCharVal.empty()){
		if (mCharVal.size()==1)
			return Foundation::charRecognizer(mCharVal[0],false);
119
		else
120 121
			return Utils::literal(mCharVal);
	}
122
	bctbx_fatal("ABNFElement::buildRecognizer is empty, should not happen!");
Ronan's avatar
Ronan committed
123
	return nullptr;
Simon Morlat's avatar
Simon Morlat committed
124 125 126 127 128 129 130 131 132 133 134 135 136 137
}

shared_ptr< ABNFElement > ABNFElement::create(){
	return make_shared<ABNFElement>();
}

void ABNFElement::setElement(const shared_ptr< ABNFBuilder >& e){
	mElement=e;
}

void ABNFElement::setRulename(const string& rulename){
	mRulename=rulename;
}

138 139 140 141 142 143
void ABNFElement::setCharVal(const string& charval){
	mCharVal=charval.substr(1,charval.size()-2); //in order to remove surrounding quotes
}

void ABNFElement::setProseVal(const string& prose){
	if (!prose.empty()){
144
		bctbx_fatal("prose-val is not supported.");
145 146 147
	}
}

Simon Morlat's avatar
Simon Morlat committed
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
shared_ptr< ABNFRepetition > ABNFRepetition::create(){
	return make_shared<ABNFRepetition>();
}

void ABNFRepetition::setCount(int count){
	mCount=count;
}

void ABNFRepetition::setMin(int min){
	mMin=min;
}

void ABNFRepetition::setMax(int max){
	mMax=max;
}

void ABNFRepetition::setRepeat(const string& r){
	mRepeat=r;
}

168 169 170
void ABNFRepetition::setElement(const shared_ptr< ABNFElement >& e){
	mElement=e;
}
Simon Morlat's avatar
Simon Morlat committed
171 172 173 174 175 176 177 178 179 180

shared_ptr< Recognizer > ABNFRepetition::buildRecognizer(const shared_ptr< Grammar >& grammar){
	if (mRepeat.empty()) return mElement->buildRecognizer(grammar);
	if (mCount!=-1){
		return Foundation::loop()->setRecognizer(mElement->buildRecognizer(grammar), mCount, mCount);
	}else{
		return Foundation::loop()->setRecognizer(mElement->buildRecognizer(grammar), mMin, mMax);
	}
}

181 182
shared_ptr<ABNFConcatenation> ABNFConcatenation::create(){
	return make_shared<ABNFConcatenation>();
183
}
184 185

shared_ptr<Recognizer> ABNFConcatenation::buildRecognizer(const shared_ptr<Grammar> &grammar){
Simon Morlat's avatar
Simon Morlat committed
186
	if (mRepetitions.size()==0){
187
		bctbx_fatal("No repetitions set !");
Simon Morlat's avatar
Simon Morlat committed
188 189 190 191 192 193 194 195 196 197
	}
	if (mRepetitions.size()==1){
		return mRepetitions.front()->buildRecognizer(grammar);
	}else{
		auto seq=Foundation::sequence();
		for (auto it=mRepetitions.begin(); it!=mRepetitions.end(); ++it){
			seq->addRecognizer((*it)->buildRecognizer(grammar));
		}
		return seq;
	}
Ronan's avatar
Ronan committed
198
	return nullptr;
199 200
}

Simon Morlat's avatar
Simon Morlat committed
201 202 203 204
void ABNFConcatenation::addRepetition(const shared_ptr< ABNFRepetition >& r){
	mRepetitions.push_back(r);
}

205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
shared_ptr<ABNFAlternation> ABNFAlternation::create(){
	return make_shared<ABNFAlternation>();
}

void ABNFAlternation::addConcatenation(const shared_ptr<ABNFConcatenation> &c){
	mConcatenations.push_back(c);
}

shared_ptr<Recognizer> ABNFAlternation::buildRecognizer(const shared_ptr<Grammar> &grammar){
	if (mConcatenations.size()==1) return mConcatenations.front()->buildRecognizer(grammar);
	return buildRecognizerNoOptim(grammar);
}

shared_ptr< Recognizer > ABNFAlternation::buildRecognizerNoOptim(const shared_ptr< Grammar >& grammar){
	auto sel=Foundation::selector();
	for (auto it=mConcatenations.begin(); it!=mConcatenations.end(); ++it){
		sel->addRecognizer((*it)->buildRecognizer(grammar));
	}
	return sel;
}

shared_ptr<ABNFRule> ABNFRule::create(){
	return make_shared<ABNFRule>();
}

void ABNFRule::setName(const string& name){
231
	if (!mName.empty()) bctbx_error("Rule %s is renamed !!!!!", name.c_str());
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
	mName=name;
}

void ABNFRule::setAlternation(const shared_ptr<ABNFAlternation> &a){
	mAlternation=a;
}

bool ABNFRule::isExtension()const{
	return mDefinedAs.find('/')!=string::npos;
}

shared_ptr<Recognizer> ABNFRule::buildRecognizer(const shared_ptr<Grammar> &grammar){
	return mAlternation->buildRecognizer(grammar);
}

void ABNFRule::setDefinedAs(const string& defined_as){
	mDefinedAs=defined_as;
}


shared_ptr<ABNFRuleList> ABNFRuleList::create(){
	return make_shared<ABNFRuleList>();
254 255
}

256 257
void ABNFRuleList::addRule(const shared_ptr<ABNFRule>& rule){
	mRules.push_back(rule);
258 259
}

260 261 262 263 264 265
shared_ptr<Recognizer> ABNFRuleList::buildRecognizer(const shared_ptr<Grammar> &grammar){
	for (auto it=mRules.begin(); it!=mRules.end(); ++it){
		shared_ptr<ABNFRule> rule=(*it);
		if (rule->isExtension()){
			grammar->extendRule(rule->getName(), rule->buildRecognizer(grammar));
		}else{
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
			auto rec = rule->buildRecognizer(grammar);
			/* Special case: if the returned recognizer is a rule that was already added to the grammar, 
			 * we should not add it a second time, otherwise the name of the recognizer and the name in the grammar entry
			 * will be different. To solve this problem, we use an intermediary AliasRecognizer*/
			if (!rec->getName().empty()){
				/*only rules (that is recognizers added to the grammar) have a name defined*/
				if (rec->getName() != rule->getName()){
					/* we are facing a statement like rule2 = rule1 */
					auto alias = make_shared<RecognizerAlias>();
					alias->setPointed(rec);
					rec = alias;
					
				}
			}
			grammar->addRule(rule->getName(), rec);
281 282
		}
	}
Ronan's avatar
Ronan committed
283
	return nullptr;
284 285
}

286 287
ABNFGrammarBuilder::ABNFGrammarBuilder()
: mParser(make_shared<ABNFGrammar>()){
288 289 290 291 292 293 294 295
	mParser.setHandler("rulelist", make_fn(&ABNFRuleList::create))
		->setCollector("rule", make_sfn(&ABNFRuleList::addRule));
	mParser.setHandler("rule", make_fn(&ABNFRule::create))
		->setCollector("rulename",make_sfn(&ABNFRule::setName))
		->setCollector("defined-as",make_sfn(&ABNFRule::setDefinedAs))
		->setCollector("alternation",make_sfn(&ABNFRule::setAlternation));
	mParser.setHandler("alternation", make_fn(&ABNFAlternation::create))
		->setCollector("concatenation",make_sfn(&ABNFAlternation::addConcatenation));
Simon Morlat's avatar
Simon Morlat committed
296 297 298 299 300 301 302 303 304 305 306 307
	mParser.setHandler("concatenation", make_fn(&ABNFConcatenation::create))
		->setCollector("repetition", make_sfn(&ABNFConcatenation::addRepetition));
	mParser.setHandler("repetition", make_fn(&ABNFRepetition::create))
		->setCollector("repeat", make_sfn(&ABNFRepetition::setRepeat))
		->setCollector("repeat-min", make_sfn(&ABNFRepetition::setMin))
		->setCollector("repeat-max", make_sfn(&ABNFRepetition::setMax))
		->setCollector("repeat-count", make_sfn(&ABNFRepetition::setCount))
		->setCollector("element", make_sfn(&ABNFRepetition::setElement));
	mParser.setHandler("element", make_fn(&ABNFElement::create))
		->setCollector("rulename", make_sfn(&ABNFElement::setRulename))
		->setCollector("group", make_sfn(&ABNFElement::setElement))
		->setCollector("option", make_sfn(&ABNFElement::setElement))
308
		->setCollector("char-val", make_sfn(&ABNFElement::setCharVal))
Simon Morlat's avatar
Simon Morlat committed
309 310 311 312
		->setCollector("num-val", make_sfn(&ABNFElement::setElement))
		->setCollector("prose-val", make_sfn(&ABNFElement::setElement));
	mParser.setHandler("group", make_fn(&ABNFGroup::create))
		->setCollector("alternation", make_sfn(&ABNFGroup::setAlternation));
313 314 315 316 317 318
	mParser.setHandler("option", make_fn(&ABNFOption::create))
		->setCollector("alternation", make_sfn(&ABNFOption::setAlternation));
	mParser.setHandler("num-val", make_fn(&ABNFNumval::create))
		->setCollector("bin-val", make_sfn(&ABNFNumval::setBinVal))
		->setCollector("hex-val", make_sfn(&ABNFNumval::setHexVal))
		->setCollector("dec-val", make_sfn(&ABNFNumval::setDecVal));
319 320
}

321
shared_ptr<Grammar> ABNFGrammarBuilder::createFromAbnf(const string &abnf, const shared_ptr<Grammar> &gram){
322
	size_t parsed;
323
	shared_ptr<ABNFBuilder> builder = mParser.parseInput("rulelist",abnf,&parsed);
324
	if (!builder) {
325
		bctbx_error("Failed to create builder.");
326 327 328
		return nullptr;
	}

329
	if (parsed<(size_t)abnf.size()){
330
		bctbx_error("Only %llu bytes parsed over a total of %llu.", (unsigned long long)parsed, (unsigned long long) abnf.size());
Ronan's avatar
Ronan committed
331
		return nullptr;
332
	}
333

Simon Morlat's avatar
Simon Morlat committed
334
	shared_ptr<Grammar> retGram;
Ronan's avatar
Ronan committed
335
	if (gram==nullptr) retGram=make_shared<Grammar>(abnf);
Simon Morlat's avatar
Simon Morlat committed
336
	else retGram=gram;
337

Simon Morlat's avatar
Simon Morlat committed
338
	builder->buildRecognizer(retGram);
339
	bctbx_message("Succesfully created grammar with %i rules.", retGram->getNumRules());
Simon Morlat's avatar
Simon Morlat committed
340
	if (retGram->isComplete()){
341
		bctbx_message("Grammar is complete.");
342
		retGram->optimize();
343
		bctbx_message("Grammar has been optimized.");
344
	}else{
345
		bctbx_warning("Grammar is not complete.");
346 347
	}
	return gram;
348 349
}

350 351 352
shared_ptr<Grammar> ABNFGrammarBuilder::createFromAbnfFile(const string &path, const shared_ptr<Grammar> &gram){
	ifstream istr(path);
	if (!istr.is_open()){
353
		bctbx_error("Could not open %s", path.c_str());
Ronan's avatar
Ronan committed
354
		return nullptr;
355 356 357 358 359 360
	}
	stringstream sstr;
	sstr<<istr.rdbuf();
	return createFromAbnf(sstr.str(), gram);
}

361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423
GrammarLoader * GrammarLoader::sInstance = nullptr;

GrammarLoader::GrammarLoader(){
	mSystemPaths.push_back(BELR_GRAMMARS_DIR);
	mSystemPaths.push_back(BELR_GRAMMARS_RELATIVE_DIR);
}

GrammarLoader &GrammarLoader::get(){
	if (sInstance == nullptr){
		sInstance = new GrammarLoader();
	}
	return *sInstance;
}

void GrammarLoader::addPath(const string &path){
	mAppPaths.push_front(path);
}

void GrammarLoader::clear(){
	mAppPaths.clear();
}

string GrammarLoader::lookup(const string &fileName, const list<string> & paths){
	for(auto & it : paths){
		ostringstream absFilename;
		absFilename<<it<<"/"<<fileName;
		if (bctbx_file_exist(absFilename.str().c_str()) == 0){
			return absFilename.str();
		}
	}
	return "";
}

bool GrammarLoader::isAbsolutePath(const string &fileName){
	if (fileName[0] == '/') return TRUE;
#ifdef _WIN32
	/* for windows:*/
	if (fileName.size() > 2 && fileName[1] == ':') return TRUE;
#endif
	return FALSE;
}

shared_ptr<Grammar> GrammarLoader::load(const string &fileName){
	string absFilename;
	
	if (isAbsolutePath(fileName)){
		absFilename = fileName;
	}
	if (absFilename.empty()){
		absFilename = lookup(fileName, mAppPaths);
	}
	if (absFilename.empty()){
		absFilename = lookup(fileName, mSystemPaths);
	}
	if (absFilename.empty()){
		bctbx_error("Could not load grammar %s because the file could not be located.", fileName.c_str());
		return nullptr;
	}
	shared_ptr<Grammar> ret = make_shared<Grammar>(fileName);
	if (ret->load(absFilename) == 0) return ret;
	return nullptr;
}

424
}//end of namespace