Commit 43333c36 authored by Simon Morlat's avatar Simon Morlat

grammar serialization and reading works. Rule aliasing bug resolved.

parent b2ad7a49
......@@ -23,7 +23,7 @@
#include <map>
#include <memory>
#include <string>
#include <fstream>
// =============================================================================
......@@ -46,7 +46,12 @@ namespace belr{
BELR_PUBLIC std::string tolower(const std::string &str);
class ParserContextBase;
class BinaryOutputStream;
class BinaryGrammarBuilder;
/**
* The transition map is an internal tool used to optimize recognizers
**/
struct TransitionMap{
TransitionMap();
bool intersect(const TransitionMap *other);
......@@ -69,11 +74,12 @@ public:
bool getTransitionMap(TransitionMap *mask);
void optimize();
void optimize(int recursionLevel);
void serialize(std::ofstream &fstr);
void serialize(BinaryOutputStream &fstr, bool topLevel=false);
static std::shared_ptr<Recognizer> build(BinaryGrammarBuilder &ifstr);
protected:
Recognizer() = default;
virtual void _serialize(std::ofstream &fstr) = 0;
void writeInt(std::ofstream &fstr, int number);
Recognizer(BinaryGrammarBuilder &istr);
virtual void _serialize(BinaryOutputStream &fstr) = 0;
/*returns true if the transition map is complete, false otherwise*/
virtual bool _getTransitionMap(TransitionMap *mask);
virtual void _optimize(int recursionLevel)=0;
......@@ -91,17 +97,19 @@ enum RecognizerTypeId{
LoopId,
CharRangeId,
LiteralId,
PointerId
PointerId,
AliasId,
RuleRefId
};
class CharRecognizer : public Recognizer{
public:
CharRecognizer(int to_recognize, bool caseSensitive=false);
CharRecognizer(BinaryGrammarBuilder &istr);
private:
size_t _feed(const std::shared_ptr<ParserContextBase> &ctx, const std::string &input, size_t pos) override;
void _optimize(int recursionLevel) override;
virtual void _serialize(std::ofstream &fstr) override;
virtual void _serialize(BinaryOutputStream &fstr) override;
int mToRecognize;
bool mCaseSensitive;
......@@ -110,12 +118,13 @@ private:
class Selector : public Recognizer{
public:
std::shared_ptr<Selector> addRecognizer(const std::shared_ptr<Recognizer> &element);
Selector(bool isExclusive = false);
Selector(BinaryGrammarBuilder &istr);
protected:
void _optimize(int recursionLevel) override;
size_t _feed(const std::shared_ptr<ParserContextBase> &ctx, const std::string &input, size_t pos) override;
bool _getTransitionMap(TransitionMap *mask) override;
virtual void _serialize(std::ofstream &fstr) override;
virtual void _serialize(BinaryOutputStream &fstr) override;
size_t _feedExclusive(const std::shared_ptr<ParserContextBase> &ctx, const std::string &input, size_t pos);
......@@ -125,18 +134,22 @@ protected:
/**This is an optimization of the first one for the case where there can be only a single match*/
class ExclusiveSelector : public Selector{
public:
ExclusiveSelector();
ExclusiveSelector(BinaryGrammarBuilder &istr);
private:
size_t _feed(const std::shared_ptr<ParserContextBase> &ctx, const std::string &input, size_t pos) override;
};
class Sequence : public Recognizer{
public:
Sequence() = default;
Sequence(BinaryGrammarBuilder &istr);
bool _getTransitionMap(TransitionMap *mask) override;
std::shared_ptr<Sequence> addRecognizer(const std::shared_ptr<Recognizer> &element);
protected:
virtual void _serialize(std::ofstream &fstr) override;
virtual void _serialize(BinaryOutputStream &fstr) override;
void _optimize(int recursionLevel) override;
private:
......@@ -147,12 +160,13 @@ private:
class Loop : public Recognizer{
public:
Loop() = default;
Loop(BinaryGrammarBuilder &istr);
bool _getTransitionMap(TransitionMap *mask) override;
std::shared_ptr<Loop> setRecognizer(const std::shared_ptr<Recognizer> &element, int min=0, int max=-1);
protected:
virtual void _serialize(std::ofstream &fstr) override;
virtual void _serialize(BinaryOutputStream &fstr) override;
void _optimize(int recursionLevel) override;
private:
......@@ -176,9 +190,9 @@ public:
class CharRange : public Recognizer{
public:
CharRange(int begin, int end);
CharRange(BinaryGrammarBuilder &istr);
private:
virtual void _serialize(std::ofstream &fstr) override;
virtual void _serialize(BinaryOutputStream &fstr) override;
void _optimize(int recursionLevel) override;
size_t _feed(const std::shared_ptr<ParserContextBase> &ctx, const std::string &input, size_t pos) override;
......@@ -189,12 +203,12 @@ private:
class Literal : public Recognizer{
public:
Literal(const std::string &lit);
Literal(BinaryGrammarBuilder &istr);
bool _getTransitionMap(TransitionMap *mask) override;
private:
void _optimize(int recursionLevel) override;
virtual void _serialize(std::ofstream &fstr) override;
virtual void _serialize(BinaryOutputStream &fstr) override;
size_t _feed(const std::shared_ptr<ParserContextBase> &ctx, const std::string &input, size_t pos) override;
std::string mLiteral;
......@@ -207,19 +221,47 @@ public:
static std::shared_ptr<Recognizer> char_range(int begin, int end);
};
/**
* The RecognizerPointer just points to another recognizer and delegates everything to the pointed recognizer.
* It is a place holder when a rule not-yet-defined appears when parsing an ABNF grammar.
* The pointed recognizer is set to the rule when it comes defined.
**/
class RecognizerPointer : public Recognizer{
public:
RecognizerPointer() = default;
//RecognizerPointer(BinaryGrammarBuilder &istr);
std::shared_ptr<Recognizer> getPointed();
void setPointed(const std::shared_ptr<Recognizer> &r);
private:
void _optimize(int recursionLevel) override;
virtual void _serialize(std::ofstream &fstr) override;
virtual void _serialize(BinaryOutputStream &fstr) override;
size_t _feed(const std::shared_ptr<ParserContextBase> &ctx, const std::string &input, size_t pos) override;
std::shared_ptr<Recognizer> mRecognizer;
};
/**
* The RecognizerAlias points to another recognizer and delegates everything. It is necessary to represents ABNF statements like
* rule2 = rule1
* It is different from the RecognizerPointer in its function, however it behaves exactly the same way.
* A different type is necessary to distinguish between the two usage.
**/
class RecognizerAlias : public Recognizer{
public:
RecognizerAlias() = default;
RecognizerAlias(BinaryGrammarBuilder &istr);
std::shared_ptr<Recognizer> getPointed();
void setPointed(const std::shared_ptr<Recognizer> &r);
private:
void _optimize(int recursionLevel) override;
virtual void _serialize(BinaryOutputStream &fstr) override;
size_t _feed(const std::shared_ptr<ParserContextBase> &ctx, const std::string &input, size_t pos) override;
std::shared_ptr<Recognizer> mRecognizer;
};
/**
* Grammar class represents an ABNF grammar, with all its rules.
**/
......@@ -287,19 +329,25 @@ public:
* A selector is said exclusive when a single sub-rule can match. Knowing this in advance optimizes the processing because no branch
* context is to be created to explore the different choices of the selector recognizer.
**/
void optimize();
BELR_PUBLIC void optimize();
/**
* Return the number of rules in this grammar.
**/
int getNumRules()const;
BELR_PUBLIC int getNumRules()const;
/**
* Save the grammar into a binary file.
**/
int save(const std::string &filename);
BELR_PUBLIC int save(const std::string &filename);
/**
* Load the grammar from a binary file
**/
BELR_PUBLIC int load(const std::string &filename);
private:
void assignRule(const std::string &name, const std::shared_ptr<Recognizer> &rule);
void _extendRule(const std::string &name, const std::shared_ptr<Recognizer> &rule);
std::map<std::string,std::shared_ptr<Recognizer>> mRules;
//The recognizer pointers create loops in the chain of recognizer, preventing shared_ptr<> to be released.
//We store them in this list so that we can reset them manually to break the loop of reference.
std::list<std::shared_ptr<RecognizerPointer>> mRecognizerPointers;
std::string mName;
};
......
......@@ -27,6 +27,7 @@ set(BELR_SOURCE_FILES_CXX
belr.cpp
grammarbuilder.cpp
parser.cpp
binarystream.cpp
)
bc_apply_compile_flags(BELR_SOURCE_FILES_C STRICT_OPTIONS_CPP STRICT_OPTIONS_C)
......
This diff is collapsed.
/*
* Copyright (C) 2017 Belledonne Communications SARL
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "binarystream.h"
#include "belr/belr.h"
#include <netinet/in.h>
using namespace std;
namespace belr{
/**
* The BinaryInputStream is used internally to read grammars from a binary file.
**/
int BinaryInputStream::readInt(){
int tmp = 0;
read((char*)&tmp, sizeof(tmp));
return ntohl(tmp);
}
unsigned char BinaryInputStream::readUChar(){
unsigned char ret = 0;
read((char*)&ret, 1);
return ret;
}
std::string BinaryInputStream::readString(){
string ret;
unsigned char c;
while (good()){
read((char*)&c, 1);
if (c != '\0') ret.push_back(c);
else break;
}
return ret;
}
BinaryGrammarBuilder::BinaryGrammarBuilder(Grammar &grammar) : mGrammar(grammar){
}
std::shared_ptr<Recognizer> BinaryGrammarBuilder::getRule(const string &name){
return mGrammar.getRule(name);
}
void BinaryOutputStream::writeInt(int val){
int tmp = htonl(val);
write((char*)&tmp, sizeof(tmp));
}
void BinaryOutputStream::writeUChar(unsigned char val){
write((char*)&val, 1);
}
void BinaryOutputStream::writeString(const string &val){
write((char*)val.c_str(), val.size() + 1); //because we want to write the null byte.
}
}//end of namespace
/*
* Copyright (C) 2017 Belledonne Communications SARL
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef binarystream_h
#define binarystream_h
#include <fstream>
#include <memory>
namespace belr{
class Grammar;
/**
* The BinaryInputStream is used internally to read grammars from a binary file.
**/
class BinaryInputStream : public std::ifstream{
public:
unsigned char readUChar();
int readInt();
std::string readString();
};
inline BinaryInputStream &operator>>(BinaryInputStream &istr, unsigned char &val){
val = istr.readUChar();
return istr;
}
inline BinaryInputStream &operator>>(BinaryInputStream &istr, int &val){
val = istr.readInt();
return istr;
}
inline BinaryInputStream &operator>>(BinaryInputStream &istr, unsigned int &val){
val = (unsigned int)istr.readInt();
return istr;
}
inline BinaryInputStream &operator>>(BinaryInputStream &istr, std::string &val){
val = istr.readString();
return istr;
}
/**
* The BinaryOutputStream is used internally to serialize grammars into a binary file.
**/
class BinaryOutputStream : public std::ofstream{
public:
void writeUChar(unsigned char val);
void writeInt(int val);
void writeString(const std::string &val);
};
inline BinaryOutputStream &operator<<(BinaryOutputStream &ostr, unsigned char val){
ostr.writeUChar(val);
return ostr;
}
inline BinaryOutputStream &operator<<(BinaryOutputStream &ostr, int val){
ostr.writeInt(val);
return ostr;
}
inline BinaryOutputStream &operator<<(BinaryOutputStream &ostr, unsigned int val){
ostr.writeInt((int)val);
return ostr;
}
inline BinaryOutputStream &operator<<(BinaryOutputStream &ostr, const std::string& val){
ostr.writeString(val);
return ostr;
}
inline BinaryOutputStream &operator<<(BinaryOutputStream &ostr, const char * val){
ostr.writeString(val);
return ostr;
}
class Recognizer;
class BinaryGrammarBuilder : public BinaryInputStream{
public:
BinaryGrammarBuilder(Grammar &grammar);
std::shared_ptr<Recognizer> getRule(const std::string &name);
private:
Grammar &mGrammar;
};
}//end of namespace
#endif
......@@ -262,7 +262,21 @@ shared_ptr<Recognizer> ABNFRuleList::buildRecognizer(const shared_ptr<Grammar> &
if (rule->isExtension()){
grammar->extendRule(rule->getName(), rule->buildRecognizer(grammar));
}else{
grammar->addRule(rule->getName(), rule->buildRecognizer(grammar));
auto rec = rule->buildRecognizer(grammar);
/* Special case: if the returned recognizer is a rule that was already added to the grammar,
* we should not add it a second time, otherwise the name of the recognizer and the name in the grammar entry
* will be different. To solve this problem, we use an intermediary AliasRecognizer*/
if (!rec->getName().empty()){
/*only rules (that is recognizers added to the grammar) have a name defined*/
if (rec->getName() != rule->getName()){
/* we are facing a statement like rule2 = rule1 */
auto alias = make_shared<RecognizerAlias>();
alias->setPointed(rec);
rec = alias;
}
}
grammar->addRule(rule->getName(), rec);
}
}
return nullptr;
......
......@@ -8,6 +8,7 @@
#include "belr/grammarbuilder.h"
#include "belr/abnf.h"
#include "bctoolbox/logging.h"
using namespace belr;
using namespace std;
......@@ -18,8 +19,9 @@ int main(int argc, char *argv[]){
int i;
int repeat_count=1;
if (argc<2){
cerr<<argv[0]<< " [--repeat <count>] <grammarfile-to-load> - test an abnf and instanciate the parser"<<endl;
cerr<<argv[0]<< " [--repeat <count>] <grammarfile-to-load> <input file to parse> <entry rule> [rule1] [rule2]..."<<endl;
cerr<<argv[0]<< " [--repeat <count>] [--debug] <grammar file to load> - test an abnf and instanciate the parser"<<endl;
cerr<<argv[0]<< " [--repeat <count>] [--debug] <grammar file to load> <input file to parse> <entry rule> [rule1] [rule2]..."<<endl;
cerr<<argv[0]<< " The grammar file may be either an ABNF grammar text file, or a compiled grammar generated by belr-compiler tool."<<endl;
return -1;
}
for(i=1;i<argc;++i){
......@@ -28,6 +30,8 @@ int main(int argc, char *argv[]){
if (i<argc){
repeat_count=atoi(argv[i]);
}
}else if (strcmp(argv[i],"--debug")==0){
bctbx_set_log_level(NULL, BCTBX_LOG_DEBUG);
}else{
file=argv[i];
++i;
......@@ -36,12 +40,35 @@ int main(int argc, char *argv[]){
break;
}
}
rules_first=i+1;
ABNFGrammarBuilder builder;
shared_ptr<Grammar> grammar=make_shared<Grammar>(file);
grammar->include(make_shared<CoreRules>());
grammar = builder.createFromAbnfFile(file,grammar);
ifstream ifs;
ifs.open(file);
if (!ifs.good()){
cerr<<"Cannot open "<< file<<endl;
return -1;
}
char marker[10] = {0};
ifs.read(marker,sizeof(marker)-1);
ifs.close();
shared_ptr<Grammar> grammar;
auto t_start = std::chrono::high_resolution_clock::now();
if (string(marker) == "#!belr"){
grammar = make_shared<Grammar>(file);
if (grammar->load(file) == -1){
cerr<<"Fail to load compiled grammar "<< file<<endl;
return -1;
}
}else{
ABNFGrammarBuilder builder;
grammar = make_shared<Grammar>(file);
grammar->include(make_shared<CoreRules>());
grammar = builder.createFromAbnfFile(file,grammar);
}
auto t_end = std::chrono::high_resolution_clock::now();
cout<<"Grammar loading completed in "<<std::chrono::duration<double, std::milli>(t_end-t_start).count()<<" milliseconds"<<endl;
if (message_file){
ifstream istr(message_file);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment