Commit a9f7564e authored by Simon Morlat's avatar Simon Morlat

parser mechanism seems to work great.

A few optimizations.
parent 6912b9fc
......@@ -3,5 +3,6 @@ lib_LTLIBRARIES=libbelr.la
libbelr_la_SOURCES=belr.cc belr.hh \
abnf.cc abnf.hh \
parser.cc parser.hh
parser.cc parser.hh \
grammarbuilder.cc grammarbuilder.hh
......@@ -25,7 +25,7 @@ CoreRules::CoreRules() : Grammar("core rules"){
}
void CoreRules::alpha(){
shared_ptr<Selector> selector=make_shared<Selector>();
shared_ptr<Selector> selector=Foundation::selector(true);
selector->addRecognizer(Utils::char_range('a','z'));
selector->addRecognizer(Utils::char_range('A','Z'));
......@@ -34,7 +34,7 @@ void CoreRules::alpha(){
}
void CoreRules::bit(){
shared_ptr<Selector> selector=make_shared<Selector>();
shared_ptr<Selector> selector=Foundation::selector(true);
selector->addRecognizer(make_shared<CharRecognizer>('0'));
selector->addRecognizer(make_shared<CharRecognizer>('1'));
addRule("bit",selector);
......@@ -60,7 +60,7 @@ void CoreRules::crlf(){
void CoreRules::ctl(){
addRule("ctl",
Foundation::selector()
Foundation::selector(true)
->addRecognizer(Utils::char_range(0x00, 0x1f))
->addRecognizer(Foundation::charRecognizer(0x7f,true))
);
......@@ -75,7 +75,7 @@ void CoreRules::dquote(){
}
void CoreRules::hexdig(){
addRule("hexdig", Foundation::selector()
addRule("hexdig", Foundation::selector(true)
->addRecognizer(getRule("digit"))
->addRecognizer(Foundation::charRecognizer('A'))
->addRecognizer(Foundation::charRecognizer('B'))
......@@ -103,14 +103,14 @@ void CoreRules::vchar(){
}
void CoreRules::wsp(){
addRule("wsp", Foundation::selector()
addRule("wsp", Foundation::selector(true)
->addRecognizer(getRule("sp"))
->addRecognizer(getRule("htab"))
);
}
void CoreRules::lwsp(){
addRule("lwsp", Foundation::loop()->setRecognizer(Foundation::selector()
addRule("lwsp", Foundation::loop()->setRecognizer(Foundation::selector(true)
->addRecognizer(getRule("wsp"))
->addRecognizer(Foundation::sequence()
->addRecognizer(getRule("crlf"))
......@@ -154,7 +154,7 @@ void ABNFGrammar::comment(){
->addRecognizer(Foundation::charRecognizer(';',true))
->addRecognizer(
Foundation::loop()->setRecognizer(
Foundation::selector()
Foundation::selector(true)
->addRecognizer(getRule("wsp"))
->addRecognizer(getRule("vchar"))
)
......@@ -173,7 +173,8 @@ void ABNFGrammar::c_wsp(){
addRule("c-wsp",Foundation::selector()
->addRecognizer(getRule("wsp"))
->addRecognizer(Foundation::sequence()
->addRecognizer(getRule("c-nl"))->addRecognizer(getRule("wsp"))
->addRecognizer(getRule("c-nl"))
->addRecognizer(getRule("wsp"))
)
);
}
......@@ -183,9 +184,10 @@ void ABNFGrammar::rulename(){
addRule("rulename", Foundation::sequence()
->addRecognizer(getRule("alpha"))
->addRecognizer(Foundation::loop()->setRecognizer(
Foundation::selector()->addRecognizer(getRule("alpha"))
->addRecognizer(getRule("digit"))
->addRecognizer(Foundation::charRecognizer('-'))
Foundation::selector(true)
->addRecognizer(getRule("alpha"))
->addRecognizer(getRule("digit"))
->addRecognizer(Foundation::charRecognizer('-'))
)
)
);
......@@ -369,7 +371,7 @@ void ABNFGrammar::char_val(){
->addRecognizer(getRule("dquote"))
->addRecognizer(
Foundation::loop()->setRecognizer(
Foundation::selector()
Foundation::selector(true)
->addRecognizer(Utils::char_range(0x20,0x21))
->addRecognizer(Utils::char_range(0x23,0x7e))
)
......@@ -385,7 +387,7 @@ void ABNFGrammar::num_val(){
addRule("num-val", Foundation::sequence()
->addRecognizer(Foundation::charRecognizer('%'))
->addRecognizer(
Foundation::selector()
Foundation::selector(true)
->addRecognizer(getRule("bin-val"))
->addRecognizer(getRule("dec-val"))
->addRecognizer(getRule("hex-val"))
......@@ -397,12 +399,11 @@ void ABNFGrammar::num_val(){
* prose-val = "<" *(%x20-3D / %x3F-7E) ">"
*/
void ABNFGrammar::prose_val(){
shared_ptr<Sequence> seq=make_shared<Sequence>();
addRule("prose-val", Foundation::sequence()
->addRecognizer(Foundation::charRecognizer('<'))
->addRecognizer(
Foundation::loop()->setRecognizer(
Foundation::selector()
Foundation::selector(true)
->addRecognizer(Utils::char_range(0x20,0x3d))
->addRecognizer(Utils::char_range(0x3f,0x7e))
)
......
......@@ -61,23 +61,42 @@ shared_ptr<Selector> Selector::addRecognizer(const shared_ptr<Recognizer> &r){
size_t Selector::_feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos){
size_t matched=0;
size_t bestmatch=0;
shared_ptr<ParserContext> bestCtx;
shared_ptr<HandlerContext> bestBranch;
for (auto it=mElements.begin(); it!=mElements.end(); ++it){
shared_ptr<ParserContext> currentCtx=make_shared<ParserContext>();
matched=(*it)->feed(currentCtx, input, pos);
auto br=ctx->branch();
matched=(*it)->feed(ctx, input, pos);
if (matched!=string::npos && matched>bestmatch) {
bestmatch=matched;
bestCtx=currentCtx;
if (bestBranch) ctx->removeBranch(bestBranch);
bestBranch=br;
}else{
ctx->removeBranch(br);
}
}
if (bestmatch==0) return string::npos;
if (bestmatch!=string::npos){
ctx->push(bestCtx);
ctx->merge(bestBranch);
}
return bestmatch;
}
ExclusiveSelector::ExclusiveSelector(){
}
size_t ExclusiveSelector::_feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos){
size_t matched=0;
for (auto it=mElements.begin(); it!=mElements.end(); ++it){
matched=(*it)->feed(ctx, input, pos);
if (matched!=string::npos && matched>0) {
return matched;
}
}
return string::npos;
}
Sequence::Sequence(){
}
......@@ -128,12 +147,21 @@ size_t Loop::_feed(const shared_ptr<ParserContext> &ctx, const string &input, si
return total;
}
CharRange::CharRange(int begin, int end) : mBegin(begin), mEnd(end){
}
size_t CharRange::_feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos){
int c=input[pos];
if (c>=mBegin && c<=mEnd) return 1;
return string::npos;
}
shared_ptr<CharRecognizer> Foundation::charRecognizer(int character, bool caseSensitive){
return make_shared<CharRecognizer>(character, caseSensitive);
}
shared_ptr<Selector> Foundation::selector(){
return make_shared<Selector>();
shared_ptr<Selector> Foundation::selector(bool isExclusive){
return isExclusive ? make_shared<ExclusiveSelector>() : make_shared<Selector>();
}
shared_ptr<Sequence> Foundation::sequence(){
......@@ -154,13 +182,7 @@ shared_ptr<Recognizer> Utils::literal(const string & lt){
}
shared_ptr<Recognizer> Utils::char_range(int begin, int end){
auto sel=Foundation::selector();
for(int i=begin; i<=end; i++){
sel->addRecognizer(
Foundation::charRecognizer(i,true)
);
}
return sel;
return make_shared<CharRange>(begin, end);
}
RecognizerPointer::RecognizerPointer() : mRecognizer(NULL){
......@@ -254,29 +276,4 @@ string tolower(const string &str){
return ret;
}
ParserContext::Element::Element(const shared_ptr<Recognizer> &recognizer, size_t begin, size_t count) :
mRecognizer(recognizer), mBegin(begin), mCount(count)
{
}
ParserContext::ParserContext(){
}
void ParserContext::addParsingEvent(const shared_ptr<Recognizer>& recognizer, size_t begin, size_t end){
mEvents.push_back(Element(recognizer, begin, end));
}
void ParserContext::push(const shared_ptr< ParserContext >& ctx){
mEvents.splice(mEvents.end(), ctx->mEvents);
}
const list<ParserContext::Element> &ParserContext::getEvents()const{
return mEvents;
}
size_t ParserContext::size()const{
return mEvents.size();
}
}
......@@ -34,15 +34,32 @@ private:
bool mCaseSensitive;
};
/*this is an optimization of a selector with multiple individual char recognizer*/
class CharRange : public Recognizer{
public:
CharRange(int begin, int end);
private:
virtual size_t _feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos);
int mBegin,mEnd;
};
class Selector : public Recognizer{
public:
Selector();
shared_ptr<Selector> addRecognizer(const shared_ptr<Recognizer> &element);
private:
protected:
virtual size_t _feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos);
list<shared_ptr<Recognizer>> mElements;
};
/**This is an optimization of the first one for the case where there can be only a single match*/
class ExclusiveSelector : public Selector{
public:
ExclusiveSelector();
private:
virtual size_t _feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos);
};
class Sequence : public Recognizer{
public:
Sequence();
......@@ -66,7 +83,7 @@ private:
class Foundation{
public:
static shared_ptr<CharRecognizer> charRecognizer(int character, bool caseSensitive=false);
static shared_ptr<Selector> selector();
static shared_ptr<Selector> selector(bool isExclusive=false);
static shared_ptr<Sequence> sequence();
static shared_ptr<Loop> loop();
};
......
#include "abnf.hh"
#include "grammarbuilder.hh"
#include <iostream>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
namespace belr{
ABNFGrammarBuilder::ABNFGrammarBuilder()
: mParser(make_shared<ABNFGrammar>()){
mParser.setHandler("rulelist", bind(mem_fn(&ABNFGrammarBuilder::createRuleList),this))
->setCollector<void*>("rule",bind(mem_fn(&ABNFGrammarBuilder::addRule),this, placeholders::_1, placeholders::_2));
mParser.setHandler("rule", bind(mem_fn(&ABNFGrammarBuilder::createRule),this));
}
shared_ptr<Grammar> ABNFGrammarBuilder::createFromAbnf(const string &path){
struct stat sb;
char *grammar;
size_t parsed;
if (stat(path.c_str(),&sb)==-1){
cerr<<"Could not stat "<<path<<endl;
return NULL;
}
int fd=open(path.c_str(),O_RDONLY);
grammar=new char[sb.st_size+1];
grammar[sb.st_size]='\0';
if (read(fd,grammar,sb.st_size)!=sb.st_size){
cerr<<"Could not read "<<path<<endl;
close(fd);
return NULL;
}
string sgrammar(grammar);
delete []grammar;
mParser.parseInput("rulelist",sgrammar,&parsed);
if (parsed<(size_t)sb.st_size){
cerr<<"Only "<<parsed<<" bytes parsed over a total of "<< sb.st_size <<endl;
}
return NULL;
}
void *ABNFGrammarBuilder::createRule(){
cout<<"Rule created"<<endl;
return (void*)0x1;
}
void *ABNFGrammarBuilder::createRuleList(){
cout<<"RuleList created"<<endl;
return (void*)0x2;
}
void ABNFGrammarBuilder::addRule(void *list, void *rule){
cout<<"Rule "<<rule<<" added to rule list "<<list<<endl;
}
}//end of namespace
#ifndef grammarbuilder_hh
#define grammarbuilder_hh
#include "parser.hh"
namespace belr{
class ABNFGrammarBuilder{
public:
ABNFGrammarBuilder();
shared_ptr<Grammar> createFromAbnf(const string &path);
private:
void addRule(void *list, void *rule);
void *createRuleList();
void *createRule();
Parser mParser;
};
}
#endif
#include <parser.hh>
#include <iostream>
#include <algorithm>
namespace belr{
CollectorBase::~CollectorBase(){
}
ParserContext::ParserContext(const shared_ptr<Parser> &parser) : mParser(parser), mRootObject(NULL){
void Assignment::invoke(void *parent, const string &input){
if (mChild){
shared_ptr<ParserCollector<void*>> cc=dynamic_pointer_cast<ParserCollector<void*>>(mCollector);
if (cc){
cc->invoke(parent, mChild->realize(input));
}
}else{
string value=input.substr(mBegin, mCount);
shared_ptr<ParserCollector<const string&>> cc1=dynamic_pointer_cast<ParserCollector<const string&>>(mCollector);
if (cc1){
cc1->invoke(parent, value);
return;
}
shared_ptr<ParserCollector<const char*>> cc2=dynamic_pointer_cast<ParserCollector<const char*>>(mCollector);
if (cc2){
cc2->invoke(parent, value.c_str());
return;
}
shared_ptr<ParserCollector<int>> cc3=dynamic_pointer_cast<ParserCollector<int>>(mCollector);
if (cc3){
cc3->invoke(parent, atoi(value.c_str()));
return;
}
}
}
ParserContext::ParserContext(Parser &parser) : mParser(parser){
}
shared_ptr<HandlerContext> ParserContext::beginParse(const shared_ptr<Recognizer> &rec){
shared_ptr<HandlerContext> ctx;
auto it=mParser->mHandlers.find(rec->getName());
if (it!=mParser->mHandlers.end()){
auto it=mParser.mHandlers.find(rec->getName());
if (it!=mParser.mHandlers.end()){
ctx=(*it).second->createContext();
mHandlerStack.push_back(ctx);
}
......@@ -22,37 +49,69 @@ shared_ptr<HandlerContext> ParserContext::beginParse(const shared_ptr<Recognizer
void ParserContext::endParse(const shared_ptr<Recognizer> &rec, const shared_ptr<HandlerContext> &ctx, const string &input, size_t begin, size_t count){
if (ctx){
/*assign object to parent */
shared_ptr<HandlerContext> current=mHandlerStack.back();
mHandlerStack.pop_back();
if (!mHandlerStack.empty()){
mHandlerStack.back()->invoke(rec->getName(),current->getObj());
}
}
if (!mHandlerStack.empty()){
/*assign object to parent */
mHandlerStack.back()->setChild(rec->getName(), begin, count, ctx);
}else{
//no specific handler for this rule, check for a collector from parent
if (!mHandlerStack.empty()){
shared_ptr<HandlerContext> hctx=mHandlerStack.back();
hctx->invoke(rec->getName(), input.substr(begin, count));
}
/*no parent, this is our root object*/
mRoot=ctx;
}
}
void *ParserContext::createRootObject(const string &input){
return mRoot ? mRoot->realize(input) : NULL;
}
shared_ptr<HandlerContext> ParserContext::branch(){
shared_ptr<HandlerContext> ret=mHandlerStack.back()->branch();
mHandlerStack.push_back(ret);
return ret;
}
void ParserContext::merge(const shared_ptr<HandlerContext> &other){
if (mHandlerStack.back()!=other){
cerr<<"The branch being merged is not the last one of the stack !"<<endl;
abort();
}
mHandlerStack.pop_back();
return mHandlerStack.back()->merge(other);
}
void ParserContext::removeBranch(const shared_ptr<HandlerContext> &other){
auto it=find(mHandlerStack.rbegin(), mHandlerStack.rend(),other);
if (it==mHandlerStack.rend()){
cerr<<"A branch could not be found in the stack while removing it !"<<endl;
abort();
}else{
advance(it,1);
mHandlerStack.erase(it.base());
}
}
shared_ptr<HandlerContext> ParserHandler::createContext(){
return make_shared<HandlerContext>(shared_from_this(), invoke());
return make_shared<HandlerContext>(shared_from_this());
}
Parser::Parser(const shared_ptr<Grammar> &grammar) : mGrammar(grammar){
if (!mGrammar->isComplete()){
cerr<<"Grammar not complete, aborting."<<endl;
return;
}
}
void * Parser::parseInput(const string &rulename, const string &input, size_t *parsed_size){
size_t parsed;
shared_ptr<Recognizer> rec=mGrammar->getRule(rulename);
shared_ptr<ParserContext> pctx=make_shared<ParserContext>(shared_from_this());
shared_ptr<ParserContext> pctx=make_shared<ParserContext>(*this);
parsed=rec->feed(pctx, input, 0);
if (parsed_size) *parsed_size=parsed;
return pctx->getRootObject();
return pctx->createRootObject(input);
}
}//end of namespace
\ No newline at end of file
......@@ -28,83 +28,84 @@ class HandlerContext;
class ParserHandler : public enable_shared_from_this<ParserHandler>{
public:
friend class HandlerContext;
ParserHandler(const function<void * ()> &fn) : mHandlerFunc(fn){
ParserHandler(const function<void * ()> &create)
: mHandlerCreateFunc(create){
}
template <typename _valueT>
shared_ptr<ParserHandler> setCollector(const string &child_rule_name, const function<void (void * , const _valueT)> &fn){
shared_ptr<ParserHandler> setCollector(const string &child_rule_name, function<void (void * , _valueT)> fn){
mCollectors[child_rule_name]=make_shared<ParserCollector<_valueT>>(fn);
return shared_from_this();
}
void *invoke(){
return mHandlerFunc();
return mHandlerCreateFunc();
}
shared_ptr<HandlerContext> createContext();
private:
function<void * ()> mHandlerFunc;
function<void * ()> mHandlerCreateFunc;
map<string, shared_ptr<CollectorBase> > mCollectors;
};
class Assignment{
private:
shared_ptr<CollectorBase> mCollector;
size_t mBegin;
size_t mCount;
shared_ptr<HandlerContext> mChild;
public:
Assignment(const shared_ptr<CollectorBase> &c, size_t begin, size_t count, const shared_ptr<HandlerContext> &child)
: mCollector(c), mBegin(begin), mCount(count), mChild(child)
{
}
void invoke(void *parent, const string &input);
};
class HandlerContext{
public:
HandlerContext(const shared_ptr<ParserHandler> &handler, void *obj) :
mHandler(handler), mObj(obj){
HandlerContext(const shared_ptr<ParserHandler> &handler) :
mHandler(handler){
}
void invoke(const string &subrule_name, void *subobj){
void setChild(const string &subrule_name, size_t begin, size_t count, const shared_ptr<HandlerContext> &child){
auto it=mHandler->mCollectors.find(subrule_name);
if (it!=mHandler->mCollectors.end()){
shared_ptr<CollectorBase> c=(*it).second;
shared_ptr<ParserCollector<void*>> cc=dynamic_pointer_cast<ParserCollector<void*>>(c);
if (cc){
cc->invoke(mObj, subobj);
}
mAssignments.push_back(Assignment((*it).second, begin, count, child));
}
}
void invoke(const string &subrule_name, const string &value){
auto it=mHandler->mCollectors.find(subrule_name);
if (it!=mHandler->mCollectors.end()){
shared_ptr<CollectorBase> c=(*it).second;
shared_ptr<ParserCollector<const string&>> cc1=dynamic_pointer_cast<ParserCollector<const string&>>(c);
if (cc1){
cc1->invoke(mObj, value);
return;
}
shared_ptr<ParserCollector<const char*>> cc2=dynamic_pointer_cast<ParserCollector<const char*>>(c);
if (cc2){
cc2->invoke(mObj, value.c_str());
return;
}
shared_ptr<ParserCollector<int>> cc3=dynamic_pointer_cast<ParserCollector<int>>(c);
if (cc3){
cc3->invoke(mObj, atoi(value.c_str()));
return;
}
void *realize(const string &input){
void *ret=mHandler->invoke();
for (auto it=mAssignments.begin(); it!=mAssignments.end(); ++it){
(*it).invoke(ret,input);
}
return ret;
}
void *getObj()const{
return mObj;
shared_ptr<HandlerContext> branch(){
return make_shared<HandlerContext>(mHandler);
}
void merge(const shared_ptr<HandlerContext> &other){
mAssignments.splice(mAssignments.begin(), other->mAssignments);
}
private:
shared_ptr<ParserHandler> mHandler;
void *mObj;
list<Assignment> mAssignments;
};
class Parser;
class ParserContext{
public:
ParserContext(const shared_ptr<Parser> &parser);
ParserContext(Parser &parser);
shared_ptr<HandlerContext> beginParse(const shared_ptr<Recognizer> &rec);
void endParse(const shared_ptr<Recognizer> &rec, const shared_ptr<HandlerContext> &ctx, const string &input, size_t begin, size_t count);
void *getRootObject()const{
return mRootObject;
}
shared_ptr<HandlerContext> branch();
void merge(const shared_ptr<HandlerContext> &other);
void removeBranch(const shared_ptr<HandlerContext> &other);
void *createRootObject(const string &input);
private:
shared_ptr<Parser> mParser;
Parser & mParser;
list<shared_ptr<HandlerContext>> mHandlerStack;
void *mRootObject;
shared_ptr<HandlerContext> mRoot;
};
class Parser : enable_shared_from_this<Parser>{
class Parser{
friend class ParserContext;
public:
Parser(const shared_ptr<Grammar> &grammar);
......@@ -119,53 +120,7 @@ private:
shared_ptr<Grammar> mGrammar;
map<string, shared_ptr<ParserHandler>> mHandlers;
};
#if 0
class CollectorBase{
public:
};
template <typename _ParserElementT, typename _valueT>
class ParserCollector : public CollectorBase{
public:
ParserCollector(const function<void (_ParserElementT, const _valueT)> &fn) : mFunc(fn){
}
function<void (_ParserElementT, const _valueT)> mFunc;
};
class ParserHandlerBase : public enable_shared_from_this<ParserHandlerBase>{
public:
};
template <typename _ElementT>
class ParserHandler : public ParserHandlerBase{
public:
ParserHandler(const function<_ParserElementT ()> &fn) : mHandlerFunc(fn){
}
shared_ptr<ParserHandler<_ElementT>> setCollector(const string &child_rule_name, const shared_ptr<ParserCollector<_ElementT>> & collector);
private:
function<_ParserElementT ()> mHandlerFunc;
map<string, shared_ptr<CollectorBase> > mCollectors;
};
class ParserBase{
public:
ParserBase(const shared_ptr<Grammar> &grammar);
private:
map<string,shared_ptr<ParserHandler>> mHandlers;
};
class Parser : public ParserBase{
public:
Parser(const shared_ptr<Grammar> &grammar);
template <typename _ElementT>
shared_ptr<ParserHandler<_ElementT>> setHandler(const string &rulename, function<_ElementT ()> handler);
template <typename _ElementT>
_ElementT parseInput(const string &rulename, const string &input, size_t *parsed_size);
};
#endif
}
......
#include "abnf.hh"
#include "parser.hh"
#include "grammarbuilder.hh"
#include <iostream>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
using namespace::belr;
int main(int argc, char *argv[]){
const char *grammarfile;
int fd;
struct stat sb;
char *grammar;
shared_ptr<Parser> parser;
if (argc<2){
cerr<<argv[0]<< "grammarfile-to-load"<<endl;
return -1;
}
grammarfile=argv[1];
if (stat(grammarfile,&sb)==-1){
cerr<<"Could not stat "<<grammarfile<<endl;
return -1;
}
fd=open(grammarfile,O_RDONLY);
grammar=new char[sb.st_size+1];
grammar[sb.st_size]='\0';
if (read(fd,grammar,sb.st_size)!=sb.st_size){
cerr<<"Could not read "<<grammarfile<<endl;
return -1;
}
cout<<"Building ABNF recognizer"<<endl;
shared_ptr<ABNFGrammar> abnf_grammar=make_shared<ABNFGrammar>();
if (!abnf_grammar->isComplete()){
cerr<<"ABNF Grammar not complete, aborting."<<endl;
cerr<<argv[0]<< " <grammarfile-to-load>"<<endl;
return -1;
}
parser=make_shared<Parser>(abnf_grammar);
cout<<"Finished ABNF recognizer construction, starting parsing"<<endl;
string sgrammar(grammar);
size_t parsed;
parser->parseInput("rulelist",sgrammar,&parsed);
cout<<"parsing done"<<endl;