Commit 465a000b authored by Daniel Veillard's avatar Daniel Veillard

fixed an uninitialized variable extended the API to add the parser,

* valid.c: fixed an uninitialized variable
* xmlregexp.c include/libxml/xmlregexp.h: extended the API to
  add the parser, serializer and some debugging
* include/libxml/xmlversion.h.in: made the new support compiled
  by default if Schemas is included
* testRegexp.c: cleanup and integration of the first part of the
  new code with a special switch
* xmllint.c: show up Expr in --version if compiled in
* include/libxml/tree.h: moved the xmlBuffer definition up
Daniel
parent 630215ba
Mon Aug 22 13:49:18 CEST 2005 Daniel Veillard <daniel@veillard.com>
* valid.c: fixed an uninitialized variable
* xmlregexp.c include/libxml/xmlregexp.h: extended the API to
add the parser, serializer and some debugging
* include/libxml/xmlversion.h.in: made the new support compiled
by default if Schemas is included
* testRegexp.c: cleanup and integration of the first part of the
new code with a special switch
* xmllint.c: show up Expr in --version if compiled in
* include/libxml/tree.h: moved the xmlBuffer definition up
Mon Aug 22 12:11:10 CEST 2005 Kasimier Buchcik <libxml2-cvs@cazic.net>
* xmlschemas.c: Some preparation for the creation of a graph
......
......@@ -54,6 +54,33 @@ typedef xmlEntity *xmlEntityPtr;
*/
#define BASE_BUFFER_SIZE 4096
/**
* xmlBufferAllocationScheme:
*
* A buffer allocation scheme can be defined to either match exactly the
* need or double it's allocated size each time it is found too small.
*/
typedef enum {
XML_BUFFER_ALLOC_DOUBLEIT,
XML_BUFFER_ALLOC_EXACT,
XML_BUFFER_ALLOC_IMMUTABLE
} xmlBufferAllocationScheme;
/**
* xmlBuffer:
*
* A buffer structure.
*/
typedef struct _xmlBuffer xmlBuffer;
typedef xmlBuffer *xmlBufferPtr;
struct _xmlBuffer {
xmlChar *content; /* The buffer content UTF8 */
unsigned int use; /* The buffer size used */
unsigned int size; /* The buffer size */
xmlBufferAllocationScheme alloc; /* The realloc method */
};
/**
* XML_XML_NAMESPACE:
*
......@@ -401,33 +428,6 @@ struct _xmlRef {
int lineno; /* The line number if attr is not available */
};
/**
* xmlBufferAllocationScheme:
*
* A buffer allocation scheme can be defined to either match exactly the
* need or double it's allocated size each time it is found too small.
*/
typedef enum {
XML_BUFFER_ALLOC_DOUBLEIT,
XML_BUFFER_ALLOC_EXACT,
XML_BUFFER_ALLOC_IMMUTABLE
} xmlBufferAllocationScheme;
/**
* xmlBuffer:
*
* A buffer structure.
*/
typedef struct _xmlBuffer xmlBuffer;
typedef xmlBuffer *xmlBufferPtr;
struct _xmlBuffer {
xmlChar *content; /* The buffer content UTF8 */
unsigned int use; /* The buffer size used */
unsigned int size; /* The buffer size */
xmlBufferAllocationScheme alloc; /* The realloc method */
};
/**
* xmlNode:
*
......
......@@ -40,6 +40,7 @@ typedef xmlRegExecCtxt *xmlRegExecCtxtPtr;
}
#endif
#include <libxml/tree.h>
#include <libxml/dict.h>
#ifdef __cplusplus
extern "C" {
#endif
......@@ -115,10 +116,24 @@ XMLPUBFUN xmlExpCtxtPtr XMLCALL
xmlExpNewCtxt (int maxNodes,
xmlDictPtr dict);
XMLPUBFUN int XMLCALL
xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt);
XMLPUBFUN int XMLCALL
xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt);
/* Expressions are trees but the tree is opaque */
typedef struct _xmlExpNode xmlExpNode;
typedef xmlExpNode *xmlExpNodePtr;
typedef enum {
XML_EXP_EMPTY = 0,
XML_EXP_FORBID = 1,
XML_EXP_ATOM = 2,
XML_EXP_SEQ = 3,
XML_EXP_OR = 4,
XML_EXP_COUNT = 5
} xmlExpNodeType;
/*
* 2 core expressions shared by all for the empty language set
* and for the set with just the empty token
......@@ -131,30 +146,45 @@ XMLPUBVAR xmlExpNodePtr emptyExp;
*/
XMLPUBFUN void XMLCALL
xmlExpFree (xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp);
xmlExpNodePtr expr);
XMLPUBFUN void XMLCALL
xmlExpRef (xmlExpNodePtr exp);
xmlExpRef (xmlExpNodePtr expr);
/*
* constructors can be either manual or from a string
*/
XMLPUBFUN xmlExpNodePtr XMLCALL
xmlExpParse (xmlExpCtxtPtr ctxt,
const char *expr);
/*
* The really interesting APIs
*/
XMLPUBFUN int XMLCALL
xmlExpIsNillable(xmlExpNodePtr exp);
xmlExpIsNillable(xmlExpNodePtr expr);
XMLPUBFUN int XMLCALL
xmlExpMaxToken (xmlExpNodePtr expr);
XMLPUBFUN int XMLCALL
xmlExpGetLanguage(xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp,
xmlExpNodePtr expr,
const xmlChar**list,
int len);
XMLPUBFUN int XMLCALL
xmlExpGetStart (xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp,
xmlExpNodePtr expr,
const xmlChar**list,
int len);
XMLPUBFUN xmlExpNodePtr XMLCALL
xmlExpStringDerive(xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp,
xmlExpNodePtr expr,
const xmlChar *str,
int len);
XMLPUBFUN int XMLCALL
xmlExpSubsume (xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp,
xmlExpNodePtr expr,
xmlExpNodePtr sub);
XMLPUBFUN void XMLCALL
xmlExpDump (xmlBufferPtr buf,
xmlExpNodePtr exp);
#endif /* LIBXML_EXPR_ENABLED */
#ifdef __cplusplus
}
......
......@@ -330,6 +330,15 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
#define LIBXML_AUTOMATA_ENABLED
#endif
/**
* LIBXML_EXPR_ENABLED:
*
* Whether the formal expressions interfaces are compiled in
*/
#if @WITH_SCHEMAS@
#define LIBXML_EXPR_ENABLED
#endif
/**
* LIBXML_SCHEMAS_ENABLED:
*
......
......@@ -85,13 +85,121 @@ testRegexpFile(const char *filename) {
xmlRegFreeRegexp(comp);
}
#ifdef LIBXML_EXPR_ENABLED
static void
runFileTest(xmlExpCtxtPtr ctxt, const char *filename) {
xmlExpNodePtr expr = NULL, sub;
FILE *input;
char expression[5000];
int len;
input = fopen(filename, "r");
if (input == NULL) {
xmlGenericError(xmlGenericErrorContext,
"Cannot open %s for reading\n", filename);
return;
}
while (fgets(expression, 4500, input) != NULL) {
len = strlen(expression);
len--;
while ((len >= 0) &&
((expression[len] == '\n') || (expression[len] == '\t') ||
(expression[len] == '\r') || (expression[len] == ' '))) len--;
expression[len + 1] = 0;
if (len >= 0) {
if (expression[0] == '#')
continue;
if ((expression[0] == '=') && (expression[1] == '>')) {
char *str = &expression[2];
if (expr != NULL) {
xmlExpFree(ctxt, expr);
if (xmlExpCtxtNbNodes(ctxt) != 0)
printf(" Parse/free of Expression leaked %d\n",
xmlExpCtxtNbNodes(ctxt));
expr = NULL;
}
printf("Expression: %s\n", str) ;
expr = xmlExpParse(ctxt, str);
if (expr == NULL) {
printf(" parsing Failed\n");
break;
}
} else if (expr != NULL) {
int expect = -1;
int nodes1, nodes2;
if (expression[0] == '0')
expect = 0;
if (expression[0] == '1')
expect = 1;
printf("Subexp: %s", expression + 2) ;
nodes1 = xmlExpCtxtNbNodes(ctxt);
sub = xmlExpParse(ctxt, expression + 2);
if (sub == NULL) {
printf(" parsing Failed\n");
break;
} else {
int ret;
nodes2 = xmlExpCtxtNbNodes(ctxt);
ret = xmlExpSubsume(ctxt, expr, sub);
if ((expect == 1) && (ret == 1)) {
printf(" => accept, Ok\n");
} else if ((expect == 0) && (ret == 0)) {
printf(" => reject, Ok\n");
} else if ((expect == 1) && (ret == 0)) {
printf(" => reject, Failed\n");
} else if ((expect == 0) && (ret == 1)) {
printf(" => accept, Failed\n");
} else {
printf(" => fail internally\n");
}
if (xmlExpCtxtNbNodes(ctxt) > nodes2) {
printf(" Subsume leaked %d\n",
xmlExpCtxtNbNodes(ctxt) - nodes2);
nodes1 += xmlExpCtxtNbNodes(ctxt) - nodes2;
}
xmlExpFree(ctxt, sub);
if (xmlExpCtxtNbNodes(ctxt) > nodes1) {
printf(" Parse/free leaked %d\n",
xmlExpCtxtNbNodes(ctxt) - nodes1);
}
}
}
}
}
if (expr != NULL) {
xmlExpFree(ctxt, expr);
if (xmlExpCtxtNbNodes(ctxt) != 0)
printf(" Parse/free of Expression leaked %d\n",
xmlExpCtxtNbNodes(ctxt));
}
fclose(input);
}
#endif
static void usage(const char *name) {
fprintf(stderr, "Usage: %s\n", name);
fprintf(stderr, "Usage: %s [flags]\n", name);
fprintf(stderr, "Testing tool for libxml2 string and pattern regexps\n");
fprintf(stderr, " --debug: switch on debugging\n");
fprintf(stderr, " --repeat: loop on the operation\n");
#ifdef LIBXML_EXPR_ENABLED
fprintf(stderr, " --expr: test xmlExp and not xmlRegexp\n");
#endif
fprintf(stderr, " --input filename: use the given filename for regexp\n");
fprintf(stderr, " --input filename: use the given filename for exp\n");
}
int main(int argc, char **argv) {
xmlRegexpPtr comp = NULL;
#ifdef LIBXML_EXPR_ENABLED
xmlExpNodePtr expr = NULL;
int use_exp = 0;
xmlExpCtxtPtr ctxt = NULL;
#endif
const char *pattern = NULL;
char *filename = NULL;
int i;
......@@ -113,15 +221,32 @@ int main(int argc, char **argv) {
} else if ((!strcmp(argv[i], "-repeat")) ||
(!strcmp(argv[i], "--repeat"))) {
repeat++;
} else if ((!strcmp(argv[i], "-i")) || (!strcmp(argv[i], "--input")))
#ifdef LIBXML_EXPR_ENABLED
} else if ((!strcmp(argv[i], "-expr")) ||
(!strcmp(argv[i], "--expr"))) {
use_exp++;
#endif
} else if ((!strcmp(argv[i], "-i")) || (!strcmp(argv[i], "-f")) ||
(!strcmp(argv[i], "--input")))
filename = argv[++i];
else {
fprintf(stderr, "Unknown option %s\n", argv[i]);
usage(argv[0]);
}
}
#ifdef LIBXML_EXPR_ENABLED
if (use_exp)
ctxt = xmlExpNewCtxt(0, NULL);
#endif
if (filename != NULL) {
testRegexpFile(filename);
#ifdef LIBXML_EXPR_ENABLED
if (use_exp)
runFileTest(ctxt, filename);
else
#endif
testRegexpFile(filename);
} else {
for (i = 1; i < argc ; i++) {
if ((argv[i][0] != '-') || (strcmp(argv[i], "-") == 0)) {
......@@ -143,6 +268,13 @@ int main(int argc, char **argv) {
if (comp != NULL)
xmlRegFreeRegexp(comp);
}
#ifdef LIBXML_EXPR_ENABLED
if (ctxt != NULL) {
printf("Ops: %d nodes, %d cons\n",
xmlExpCtxtNbNodes(ctxt), xmlExpCtxtNbCons(ctxt));
xmlExpFreeCtxt(ctxt);
}
#endif
xmlCleanupParser();
xmlMemoryDump();
return(0);
......
......@@ -2716,7 +2716,7 @@ xmlIsID(xmlDocPtr doc, xmlNodePtr elem, xmlAttrPtr attr) {
} else if (elem == NULL) {
return(0);
} else {
xmlAttributePtr attrDecl;
xmlAttributePtr attrDecl = NULL;
xmlChar felem[50], fattr[50];
xmlChar *fullelemname, *fullattrname;
......
......@@ -2752,6 +2752,9 @@ static void showVersion(const char *name) {
#ifdef LIBXML_REGEXP_ENABLED
fprintf(stderr, "Regexps ");
#endif
#ifdef LIBXML_EXPR_ENABLED
fprintf(stderr, "Expr ");
#endif
#ifdef LIBXML_AUTOMATA_ENABLED
fprintf(stderr, "Automata ");
#endif
......
......@@ -5491,15 +5491,6 @@ xmlAutomataIsDeterminist(xmlAutomataPtr am) {
* Formal Expression handling code *
* *
************************************************************************/
static void xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr exp, int glob);
#define PRINT_EXP(exp) { \
xmlBufferPtr xmlExpBuf; \
xmlExpBuf = xmlBufferCreate(); \
xmlExpDump(xmlExpBuf, exp, 0); \
xmlBufferWriteChar(xmlExpBuf, "\n"); \
xmlBufferDump(stdout, xmlExpBuf); \
xmlBufferFree(xmlExpBuf); \
}
/************************************************************************
* *
* Expression handling context *
......@@ -5515,7 +5506,6 @@ struct _xmlExpCtxt {
const char *expr;
const char *cur;
int nb_cons;
int nb_del;
int tabSize;
};
......@@ -5583,14 +5573,22 @@ xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
* Structure associated to an expression node *
* *
************************************************************************/
typedef enum {
XML_EXP_EMPTY = 0,
XML_EXP_FORBID = 1,
XML_EXP_ATOM = 2,
XML_EXP_SEQ = 3,
XML_EXP_OR = 4,
XML_EXP_COUNT = 5
} xmlExpNodeType;
#define MAX_NODES 10000
/* #define DEBUG_DERIV */
/*
* TODO:
* - Wildcards
* - public API for creation
*
* Started
* - regression testing
*
* Done
* - split into module and test tool
* - memleaks
*/
typedef enum {
XML_EXP_NILABLE = (1 << 0)
......@@ -5993,7 +5991,6 @@ xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp) {
xmlExpFree(ctxt, exp->exp_left);
}
xmlFree(exp);
ctxt->nb_del++;
ctxt->nb_nodes--;
}
}
......@@ -6877,6 +6874,326 @@ xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
xmlExpFree(ctxt, tmp);
return(0);
}
/************************************************************************
* *
* Parsing expression *
* *
************************************************************************/
static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
#undef CUR
#define CUR (*ctxt->cur)
#undef NEXT
#define NEXT ctxt->cur++;
#undef IS_BLANK
#define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
#define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
static int
xmlExpParseNumber(xmlExpCtxtPtr ctxt) {
int ret = 0;
SKIP_BLANKS
if (CUR == '*') {
NEXT
return(-1);
}
if ((CUR < '0') || (CUR > '9'))
return(-1);
while ((CUR >= '0') && (CUR <= '9')) {
ret = ret * 10 + (CUR - '0');
NEXT
}
return(ret);
}
static xmlExpNodePtr
xmlExpParseOr(xmlExpCtxtPtr ctxt) {
const char *base;
xmlExpNodePtr ret;
const xmlChar *val;
SKIP_BLANKS
base = ctxt->cur;
if (*ctxt->cur == '(') {
NEXT
ret = xmlExpParseExpr(ctxt);
SKIP_BLANKS
if (*ctxt->cur != ')') {
fprintf(stderr, "unbalanced '(' : %s\n", base);
xmlExpFree(ctxt, ret);
return(NULL);
}
NEXT;
SKIP_BLANKS
goto parse_quantifier;
}
while ((CUR != 0) && (!(IS_BLANK(CUR))) && (CUR != '(') &&
(CUR != ')') && (CUR != '|') && (CUR != ',') && (CUR != '{') &&
(CUR != '*') && (CUR != '+') && (CUR != '?') && (CUR != '}'))
NEXT;
val = xmlDictLookup(ctxt->dict, BAD_CAST base, ctxt->cur - base);
if (val == NULL)
return(NULL);
ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, val, 0, 0);
if (ret == NULL)
return(NULL);
SKIP_BLANKS
parse_quantifier:
if (CUR == '{') {
int min, max;
NEXT
min = xmlExpParseNumber(ctxt);
if (min < 0) {
xmlExpFree(ctxt, ret);
return(NULL);
}
SKIP_BLANKS
if (CUR == ',') {
NEXT
max = xmlExpParseNumber(ctxt);
SKIP_BLANKS
} else
max = min;
if (CUR != '}') {
xmlExpFree(ctxt, ret);
return(NULL);
}
NEXT
ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
min, max);
SKIP_BLANKS
} else if (CUR == '?') {
NEXT
ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
0, 1);
SKIP_BLANKS
} else if (CUR == '+') {
NEXT
ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
1, -1);
SKIP_BLANKS
} else if (CUR == '*') {
NEXT
ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
0, -1);
SKIP_BLANKS
}
return(ret);
}
static xmlExpNodePtr
xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
xmlExpNodePtr ret, right;
ret = xmlExpParseOr(ctxt);
SKIP_BLANKS
while (CUR == '|') {
NEXT
right = xmlExpParseOr(ctxt);
if (right == NULL) {
xmlExpFree(ctxt, ret);
return(NULL);
}
ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, right, NULL, 0, 0);
if (ret == NULL)
return(NULL);
}
return(ret);
}
static xmlExpNodePtr
xmlExpParseExpr(xmlExpCtxtPtr ctxt) {
xmlExpNodePtr ret, right;
ret = xmlExpParseSeq(ctxt);
SKIP_BLANKS
while (CUR == ',') {
NEXT
right = xmlExpParseSeq(ctxt);
if (right == NULL) {
xmlExpFree(ctxt, ret);
return(NULL);
}
ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, right, NULL, 0, 0);
if (ret == NULL)
return(NULL);
}
return(ret);
}
/**
* xmlExpParse:
* @ctxt: the expressions context
* @expr: the 0 terminated string
*
* Minimal parser for regexps, it understand the following constructs
* - string terminals
* - choice operator |
* - sequence operator ,
* - subexpressions (...)
* - usual cardinality operators + * and ?
* - finite sequences { min, max }
* - infinite sequences { min, * }
* There is minimal checkings made especially no checking on strings values
*
* Returns a new expression or NULL in case of failure
*/
xmlExpNodePtr
xmlExpParse(xmlExpCtxtPtr ctxt, const char *expr) {
xmlExpNodePtr ret;
ctxt->expr = expr;
ctxt->cur = expr;
ret = xmlExpParseExpr(ctxt);
SKIP_BLANKS
if (*ctxt->cur != 0) {
xmlExpFree(ctxt, ret);
return(NULL);
}
return(ret);
}
static void
xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
xmlExpNodePtr c;
if (expr == NULL) return;
if (glob) xmlBufferWriteChar(buf, "(");
switch (expr->type) {
case XML_EXP_EMPTY:
xmlBufferWriteChar(buf, "empty");
break;
case XML_EXP_FORBID:
xmlBufferWriteChar(buf, "forbidden");
break;
case XML_EXP_ATOM:
xmlBufferWriteCHAR(buf, expr->exp_str);
break;
case XML_EXP_SEQ:
c = expr->exp_left;
if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
xmlExpDumpInt(buf, c, 1);
else
xmlExpDumpInt(buf, c, 0);
xmlBufferWriteChar(buf, " , ");
c = expr->exp_right;
if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
xmlExpDumpInt(buf, c, 1);
else
xmlExpDumpInt(buf, c, 0);
break;
case XML_EXP_OR:
c = expr->exp_left;
if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
xmlExpDumpInt(buf, c, 1);
else
xmlExpDumpInt(buf, c, 0);
xmlBufferWriteChar(buf, " | ");
c = expr->exp_right;
if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
xmlExpDumpInt(buf, c, 1);
else
xmlExpDumpInt(buf, c, 0);
break;
case XML_EXP_COUNT: {
char rep[40];
c = expr->exp_left;
if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
xmlExpDumpInt(buf, c, 1);
else
xmlExpDumpInt(buf, c, 0);
if ((expr->exp_min == 0) && (expr->exp_max == 1)) {
rep[0] = '?';
rep[1] = 0;
} else if ((expr->exp_min == 0) && (expr->exp_max == -1)) {
rep[0] = '*';
rep[1] = 0;
} else if ((expr->exp_min == 1) && (expr->exp_max == -1)) {
rep[0] = '+';
rep[1] = 0;
} else if (expr->exp_max == expr->exp_min) {
snprintf(rep, 39, "{%d}", expr->exp_min);
} else if (expr->exp_max < 0) {
snprintf(rep, 39, "{%d,inf}", expr->exp_min);
} else {
snprintf(rep, 39, "{%d,%d}", expr->exp_min, expr->exp_max);
}
rep[39] = 0;
xmlBufferWriteChar(buf, rep);
break;
}
default:
fprintf(stderr, "Error in tree\n");
}
if (glob)
xmlBufferWriteChar(buf, ")");
}
/**
* xmlExpDump:
* @buf: a buffer to receive the output
* @expr: the compiled expression
*
* Serialize the expression as compiled to the buffer
*/
void
xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr exp) {
if ((buf == NULL) || (exp == NULL))
return;
xmlExpDumpInt(buf, exp, 0);
}
/**
* xmlExpMaxToken:
* @expr: a compiled expression
*
* Indicate the maximum number of input a expression can accept
*
* Returns the maximum length or -1 in case of error
*/
int
xmlExpMaxToken(xmlExpNodePtr expr) {
if (expr == NULL)
return(-1);
return(expr->c_max);
}
/**
* xmlExpCtxtNbNodes:
* @ctxt: an expression context
*
* Debugging facility provides the number of allocated nodes at a that point
*
* Returns the number of nodes in use or -1 in case of error
*/
int
xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) {