Commit 6912b9fc authored by Simon Morlat's avatar Simon Morlat

work in progress

parent c7d44a0d
......@@ -2,5 +2,6 @@
lib_LTLIBRARIES=libbelr.la
libbelr_la_SOURCES=belr.cc belr.hh \
abnf.cc abnf.hh
abnf.cc abnf.hh \
parser.cc parser.hh
#include "belr.hh"
#include "parser.hh"
#include <algorithm>
#include <iostream>
......@@ -13,12 +14,23 @@ void Recognizer::setName(const string& name){
mName=name;
}
size_t Recognizer::feed(const string &input, size_t pos){
size_t match=_feed(input, pos);
if (match!=string::npos && match>0 && mName.size()>0){
string matched=input.substr(pos,match);
cout<<"Matched recognizer '"<<mName<<"' with sequence '"<<matched<<"'."<<endl;
const string &Recognizer::getName()const{
return mName;
}
size_t Recognizer::feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos){
size_t match;
shared_ptr<HandlerContext> hctx=ctx->beginParse(shared_from_this());
match=_feed(ctx, input, pos);
if (match!=string::npos && match>0){
if (0 && mName.size()>0){
string matched=input.substr(pos,match);
cout<<"Matched recognizer '"<<mName<<"' with sequence '"<<matched<<"'."<<endl;
}
}
ctx->endParse(shared_from_this(), hctx, input, pos, match);
return match;
}
......@@ -31,7 +43,7 @@ CharRecognizer::CharRecognizer(int to_recognize, bool caseSensitive) : mToRecogn
}
}
size_t CharRecognizer::_feed(const string &input, size_t pos){
size_t CharRecognizer::_feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos){
if (mCaseSensitive){
return input[pos]==mToRecognize ? 1 : string::npos;
}
......@@ -43,20 +55,26 @@ Selector::Selector(){
shared_ptr<Selector> Selector::addRecognizer(const shared_ptr<Recognizer> &r){
mElements.push_back(r);
return shared_from_this();
return static_pointer_cast<Selector> (shared_from_this());
}
size_t Selector::_feed(const string &input, size_t pos){
size_t Selector::_feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos){
size_t matched=0;
size_t bestmatch=0;
shared_ptr<ParserContext> bestCtx;
for (auto it=mElements.begin(); it!=mElements.end(); ++it){
matched=(*it)->feed(input, pos);
shared_ptr<ParserContext> currentCtx=make_shared<ParserContext>();
matched=(*it)->feed(currentCtx, input, pos);
if (matched!=string::npos && matched>bestmatch) {
bestmatch=matched;
bestCtx=currentCtx;
}
}
if (bestmatch==0) return string::npos;
if (bestmatch!=string::npos){
ctx->push(bestCtx);
}
return bestmatch;
}
......@@ -65,15 +83,15 @@ Sequence::Sequence(){
shared_ptr<Sequence> Sequence::addRecognizer(const shared_ptr<Recognizer> &element){
mElements.push_back(element);
return shared_from_this();
return static_pointer_cast<Sequence>( shared_from_this());
}
size_t Sequence::_feed(const string &input, size_t pos){
size_t Sequence::_feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos){
size_t matched=0;
size_t total=0;
for (auto it=mElements.begin(); it!=mElements.end(); ++it){
matched=(*it)->feed(input, pos);
matched=(*it)->feed(ctx, input, pos);
if (matched==string::npos){
return string::npos;
}
......@@ -92,16 +110,16 @@ shared_ptr<Loop> Loop::setRecognizer(const shared_ptr<Recognizer> &element, int
mMin=min;
mMax=max;
mRecognizer=element;
return shared_from_this();
return static_pointer_cast<Loop>(shared_from_this());
}
size_t Loop::_feed(const string &input, size_t pos){
size_t Loop::_feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos){
size_t matched=0;
size_t total=0;
int repeat;
for(repeat=0;mMax!=-1 ? repeat<mMax : true;repeat++){
matched=mRecognizer->feed(input,pos);
matched=mRecognizer->feed(ctx, input, pos);
if (matched==string::npos) break;
total+=matched;
pos+=matched;
......@@ -152,9 +170,9 @@ shared_ptr<Recognizer> RecognizerPointer::getPointed(){
return mRecognizer;
}
size_t RecognizerPointer::_feed(const string &input, size_t pos){
size_t RecognizerPointer::_feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos){
if (mRecognizer){
return mRecognizer->feed(input,pos);
return mRecognizer->feed(ctx, input, pos);
}else{
cerr<<"RecognizerPointer is undefined"<<endl;
abort();
......@@ -183,9 +201,9 @@ void Grammar::assignRule(const string &argname, const shared_ptr<Recognizer> &ru
cerr<<"Error: rule '"<<name<<"' is being redefined !"<<endl;
abort();
}
}else{
mRules[name]=rule;
}
/*in any case the map should contain real recognizers (not just pointers) */
mRules[name]=rule;
}
shared_ptr<Recognizer> Grammar::getRule(const string &argname){
......@@ -236,4 +254,29 @@ string tolower(const string &str){
return ret;
}
ParserContext::Element::Element(const shared_ptr<Recognizer> &recognizer, size_t begin, size_t count) :
mRecognizer(recognizer), mBegin(begin), mCount(count)
{
}
ParserContext::ParserContext(){
}
void ParserContext::addParsingEvent(const shared_ptr<Recognizer>& recognizer, size_t begin, size_t end){
mEvents.push_back(Element(recognizer, begin, end));
}
void ParserContext::push(const shared_ptr< ParserContext >& ctx){
mEvents.splice(mEvents.end(), ctx->mEvents);
}
const list<ParserContext::Element> &ParserContext::getEvents()const{
return mEvents;
}
size_t ParserContext::size()const{
return mEvents.size();
}
}
#ifndef belr_hh
#define belr_hh
#include <list>
#include <map>
......@@ -9,13 +12,16 @@ namespace belr{
string tolower(const string &str);
class Recognizer{
class ParserContext;
class Recognizer : public enable_shared_from_this<Recognizer>{
public:
void setName(const string &name);
size_t feed(const string &input, size_t pos);
const string &getName()const;
size_t feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos);
protected:
Recognizer();
virtual size_t _feed(const string &input, size_t pos)=0;
virtual size_t _feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos)=0;
string mName;
};
......@@ -23,35 +29,35 @@ class CharRecognizer : public Recognizer{
public:
CharRecognizer(int to_recognize, bool caseSensitive=false);
private:
virtual size_t _feed(const string &input, size_t pos);
virtual size_t _feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos);
int mToRecognize;
bool mCaseSensitive;
};
class Selector : public Recognizer, public enable_shared_from_this<Selector>{
class Selector : public Recognizer{
public:
Selector();
shared_ptr<Selector> addRecognizer(const shared_ptr<Recognizer> &element);
private:
virtual size_t _feed(const string &input, size_t pos);
virtual size_t _feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos);
list<shared_ptr<Recognizer>> mElements;
};
class Sequence : public Recognizer, public enable_shared_from_this<Sequence>{
class Sequence : public Recognizer{
public:
Sequence();
shared_ptr<Sequence> addRecognizer(const shared_ptr<Recognizer> &element);
private:
virtual size_t _feed(const string &input, size_t pos);
virtual size_t _feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos);
list<shared_ptr<Recognizer>> mElements;
};
class Loop : public Recognizer, public enable_shared_from_this<Loop>{
class Loop : public Recognizer{
public:
Loop();
shared_ptr<Loop> setRecognizer(const shared_ptr<Recognizer> &element, int min=0, int max=-1);
private:
virtual size_t _feed(const string &input, size_t pos);
virtual size_t _feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos);
shared_ptr<Recognizer> mRecognizer;
int mMin, mMax;
};
......@@ -77,7 +83,7 @@ public:
shared_ptr<Recognizer> getPointed();
void setPointed(const shared_ptr<Recognizer> &r);
private:
virtual size_t _feed(const string &input, size_t pos);
virtual size_t _feed(const shared_ptr<ParserContext> &ctx, const string &input, size_t pos);
shared_ptr<Recognizer> mRecognizer;
};
......@@ -85,6 +91,9 @@ class Grammar{
public:
Grammar(const string &name);
void include(const shared_ptr<Grammar>& grammar);
/* the grammar takes ownership of the recognizer, which must not be used outside of this grammar.
* TODO: use unique_ptr to enforce this, or make a copy ?
**/
template <typename _recognizerT>
shared_ptr<_recognizerT> addRule(const string & name, const shared_ptr<_recognizerT> &rule){
assignRule(name, rule);
......@@ -98,4 +107,9 @@ private:
string mName;
};
}
#endif
#include <parser.hh>
#include <iostream>
namespace belr{
CollectorBase::~CollectorBase(){
}
ParserContext::ParserContext(const shared_ptr<Parser> &parser) : mParser(parser), mRootObject(NULL){
}
shared_ptr<HandlerContext> ParserContext::beginParse(const shared_ptr<Recognizer> &rec){
shared_ptr<HandlerContext> ctx;
auto it=mParser->mHandlers.find(rec->getName());
if (it!=mParser->mHandlers.end()){
ctx=(*it).second->createContext();
mHandlerStack.push_back(ctx);
}
return ctx;
}
void ParserContext::endParse(const shared_ptr<Recognizer> &rec, const shared_ptr<HandlerContext> &ctx, const string &input, size_t begin, size_t count){
if (ctx){
/*assign object to parent */
shared_ptr<HandlerContext> current=mHandlerStack.back();
mHandlerStack.pop_back();
if (!mHandlerStack.empty()){
mHandlerStack.back()->invoke(rec->getName(),current->getObj());
}
}else{
//no specific handler for this rule, check for a collector from parent
if (!mHandlerStack.empty()){
shared_ptr<HandlerContext> hctx=mHandlerStack.back();
hctx->invoke(rec->getName(), input.substr(begin, count));
}
}
}
shared_ptr<HandlerContext> ParserHandler::createContext(){
return make_shared<HandlerContext>(shared_from_this(), invoke());
}
Parser::Parser(const shared_ptr<Grammar> &grammar) : mGrammar(grammar){
}
void * Parser::parseInput(const string &rulename, const string &input, size_t *parsed_size){
size_t parsed;
shared_ptr<Recognizer> rec=mGrammar->getRule(rulename);
shared_ptr<ParserContext> pctx=make_shared<ParserContext>(shared_from_this());
parsed=rec->feed(pctx, input, 0);
if (parsed_size) *parsed_size=parsed;
return pctx->getRootObject();
}
}//end of namespace
\ No newline at end of file
#ifndef parser_hh
#define parser_hh
#include <functional>
#include "belr.hh"
namespace belr{
class CollectorBase{
public:
virtual ~CollectorBase();
};
template <typename _valueT>
class ParserCollector : public CollectorBase{
public:
ParserCollector(const function<void (void *, _valueT)> &fn) : mFunc(fn){
}
function<void (void *, _valueT)> mFunc;
void invoke(void *obj, _valueT value){
mFunc(obj,value);
}
};
class HandlerContext;
class ParserHandler : public enable_shared_from_this<ParserHandler>{
public:
friend class HandlerContext;
ParserHandler(const function<void * ()> &fn) : mHandlerFunc(fn){
}
template <typename _valueT>
shared_ptr<ParserHandler> setCollector(const string &child_rule_name, const function<void (void * , const _valueT)> &fn){
mCollectors[child_rule_name]=make_shared<ParserCollector<_valueT>>(fn);
return shared_from_this();
}
void *invoke(){
return mHandlerFunc();
}
shared_ptr<HandlerContext> createContext();
private:
function<void * ()> mHandlerFunc;
map<string, shared_ptr<CollectorBase> > mCollectors;
};
class HandlerContext{
public:
HandlerContext(const shared_ptr<ParserHandler> &handler, void *obj) :
mHandler(handler), mObj(obj){
}
void invoke(const string &subrule_name, void *subobj){
auto it=mHandler->mCollectors.find(subrule_name);
if (it!=mHandler->mCollectors.end()){
shared_ptr<CollectorBase> c=(*it).second;
shared_ptr<ParserCollector<void*>> cc=dynamic_pointer_cast<ParserCollector<void*>>(c);
if (cc){
cc->invoke(mObj, subobj);
}
}
}
void invoke(const string &subrule_name, const string &value){
auto it=mHandler->mCollectors.find(subrule_name);
if (it!=mHandler->mCollectors.end()){
shared_ptr<CollectorBase> c=(*it).second;
shared_ptr<ParserCollector<const string&>> cc1=dynamic_pointer_cast<ParserCollector<const string&>>(c);
if (cc1){
cc1->invoke(mObj, value);
return;
}
shared_ptr<ParserCollector<const char*>> cc2=dynamic_pointer_cast<ParserCollector<const char*>>(c);
if (cc2){
cc2->invoke(mObj, value.c_str());
return;
}
shared_ptr<ParserCollector<int>> cc3=dynamic_pointer_cast<ParserCollector<int>>(c);
if (cc3){
cc3->invoke(mObj, atoi(value.c_str()));
return;
}
}
}
void *getObj()const{
return mObj;
}
private:
shared_ptr<ParserHandler> mHandler;
void *mObj;
};
class Parser;
class ParserContext{
public:
ParserContext(const shared_ptr<Parser> &parser);
shared_ptr<HandlerContext> beginParse(const shared_ptr<Recognizer> &rec);
void endParse(const shared_ptr<Recognizer> &rec, const shared_ptr<HandlerContext> &ctx, const string &input, size_t begin, size_t count);
void *getRootObject()const{
return mRootObject;
}
private:
shared_ptr<Parser> mParser;
list<shared_ptr<HandlerContext>> mHandlerStack;
void *mRootObject;
};
class Parser : enable_shared_from_this<Parser>{
friend class ParserContext;
public:
Parser(const shared_ptr<Grammar> &grammar);
shared_ptr<ParserHandler> setHandler(const string &rulename, function<void* ()> handler){
shared_ptr<ParserHandler> ret;
mHandlers[rulename]=ret=make_shared<ParserHandler>(handler);
return ret;
}
void * parseInput(const string &rulename, const string &input, size_t *parsed_size);
private:
shared_ptr<Grammar> mGrammar;
map<string, shared_ptr<ParserHandler>> mHandlers;
};
#if 0
class CollectorBase{
public:
};
template <typename _ParserElementT, typename _valueT>
class ParserCollector : public CollectorBase{
public:
ParserCollector(const function<void (_ParserElementT, const _valueT)> &fn) : mFunc(fn){
}
function<void (_ParserElementT, const _valueT)> mFunc;
};
class ParserHandlerBase : public enable_shared_from_this<ParserHandlerBase>{
public:
};
template <typename _ElementT>
class ParserHandler : public ParserHandlerBase{
public:
ParserHandler(const function<_ParserElementT ()> &fn) : mHandlerFunc(fn){
}
shared_ptr<ParserHandler<_ElementT>> setCollector(const string &child_rule_name, const shared_ptr<ParserCollector<_ElementT>> & collector);
private:
function<_ParserElementT ()> mHandlerFunc;
map<string, shared_ptr<CollectorBase> > mCollectors;
};
class ParserBase{
public:
ParserBase(const shared_ptr<Grammar> &grammar);
private:
map<string,shared_ptr<ParserHandler>> mHandlers;
};
class Parser : public ParserBase{
public:
Parser(const shared_ptr<Grammar> &grammar);
template <typename _ElementT>
shared_ptr<ParserHandler<_ElementT>> setHandler(const string &rulename, function<_ElementT ()> handler);
template <typename _ElementT>
_ElementT parseInput(const string &rulename, const string &input, size_t *parsed_size);
};
#endif
}
#endif
......@@ -2,6 +2,7 @@
#include "abnf.hh"
#include "parser.hh"
#include <iostream>
#include <sys/types.h>
#include <sys/stat.h>
......@@ -16,7 +17,7 @@ int main(int argc, char *argv[]){
int fd;
struct stat sb;
char *grammar;
shared_ptr<Recognizer> parser;
shared_ptr<Parser> parser;
if (argc<2){
cerr<<argv[0]<< "grammarfile-to-load"<<endl;
......@@ -35,15 +36,16 @@ int main(int argc, char *argv[]){
return -1;
}
cout<<"Building ABNF recognizer"<<endl;
ABNFGrammar abnf_grammar;
if (!abnf_grammar.isComplete()){
shared_ptr<ABNFGrammar> abnf_grammar=make_shared<ABNFGrammar>();
if (!abnf_grammar->isComplete()){
cerr<<"ABNF Grammar not complete, aborting."<<endl;
return -1;
}
parser=abnf_grammar.getRule("rulelist");
parser=make_shared<Parser>(abnf_grammar);
cout<<"Finished ABNF recognizer construction, starting parsing"<<endl;
string sgrammar(grammar);
parser->feed(sgrammar,0);
size_t parsed;
parser->parseInput("rulelist",sgrammar,&parsed);
cout<<"parsing done"<<endl;
delete []grammar;
return 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment