Commit 260a68fd authored by Daniel Veillard's avatar Daniel Veillard

Release 0.2, 80% rewrite, nothing left intact ... Daniel

parent a65771c1
Wed Aug 12 23:12:58 EDT 1998 Daniel Veillard <Daniel.Veillard@w3.org>
* New release 0.2, removed the old xml_* files so that it's
coherent with the other CVS base (W3C), far better conformance
to standard, new namespaces, decent entities support, beginning
of a SAX-like interface. Nearly nothing left intact, even the
test examples ...
1998-07-30 Christopher Blizzard <blizzard@appliedtheory.com>
* .cvsignore: Add .deps dir
......
......@@ -5,14 +5,18 @@ noinst_PROGRAMS=tester
lib_LTLIBRARIES = libxml.la
libxml_la_SOURCES = \
xml_entities.c \
xml_parser.c \
xml_tree.c
SAX.c \
entities.c \
error.c \
parser.c \
tester.c \
tree.c
include_HEADERS = \
xml_entities.h \
xml_parser.h \
xml_tree.h
entities.h \
parser.h \
tree.h
DEPS = $(top_builddir)/libxml.la
LDADDS = $(top_builddir)/libxml.la @Z_LIBS@
......
# This is a makefile for win32 systems (VC 5.0).
# Christopher Blizzard
# http://odin.appliedtheory.com/
CC = cl
CFLAGS = /c /GB /Gi /nologo /I. /DWIN32 /MT /Zi
LD = link
LDFLAGS = /DEBUG /NODEFAULTLIB:libc
AR = lib
all: xml.lib
test: tester.exe
SHARED_OBJS = entities.obj parser.obj tree.obj SAX.obj
xml.lib: $(SHARED_OBJS)
$(AR) /out:xml.lib $(SHARED_OBJS)
tester.obj: $(SHARED_OBJS)
$(CC) $(CFLAGS) tester.c /out:tester.obj
tester.exe: tester.obj xml.lib
$(LD) $(LDFLAGS) /out:tester.exe tester.obj xml.lib
clean:
-del /f $(SHARED_OBJS) tester.obj
-del /f tester.exe
-del /f xml.lib
-del /f *.pdb
-del /f *.idb
-del /f *.ilk
/*
* SAX.c : Default SAX handler to build a tree.
*/
#include <stdio.h>
#include <malloc.h>
#include "tree.h"
#include "parser.h"
#include "error.h"
/* #define DEBUG_SAX */
/*
* Return the public ID e.g. "-//SGMLSOURCE//DTD DEMO//EN"
*/
const CHAR *getPublicId(xmlParserCtxtPtr ctxt) {
return(NULL);
}
/*
* Return the system ID, basically URI or filename e.g.
* http://www.sgmlsource.com/dtds/memo.dtd
*/
const CHAR *getSystemId(xmlParserCtxtPtr ctxt) {
return(ctxt->input->filename);
}
/*
* Return the line number of the current parsing point.
*/
int getLineNumber(xmlParserCtxtPtr ctxt) {
return(ctxt->input->line);
}
/*
* Return the column number of the current parsing point.
*/
int getColumnNumber(xmlParserCtxtPtr ctxt) {
return(ctxt->input->col);
}
/*
* The default SAX Locator.
*/
xmlSAXLocator xmlDefaultSAXLocator = {
getPublicId, getSystemId, getLineNumber, getColumnNumber
};
/*
* Special entity resolver, better left to the parser, it has
* more context than the application layer.
*/
xmlParserInputPtr resolveEntity(xmlParserCtxtPtr ctxt,
const CHAR *publicId, const CHAR *systemId) {
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.resolveEntity(%s, %s)\n", publicId, systemId);
#endif
return(NULL);
}
/*
* What to do when a notation declaration has been parsed.
* TODO Not handled currently.
*/
void notationDecl(xmlParserCtxtPtr ctxt, const CHAR *name,
const CHAR *publicId, const CHAR *systemId) {
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.notationDecl(%s, %s, %s)\n", name, publicId, systemId);
#endif
}
/*
* What to do when an unparsed entity declaration is parsed
* TODO Create an Entity node.
*/
void unparsedEntityDecl(xmlParserCtxtPtr ctxt, const CHAR *name,
const CHAR *publicId, const CHAR *systemId,
const CHAR *notationName) {
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
name, publicId, systemId, notationName);
#endif
}
/*
* Receive the document locator at startup, actually xmlDefaultSAXLocator
* Everything is available on the context, so this is useless in our case.
*/
void setDocumentLocator(xmlParserCtxtPtr ctxt, xmlSAXLocatorPtr loc) {
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.setDocumentLocator()\n");
#endif
}
/*
* called when the document start being processed.
*/
void startDocument(xmlParserCtxtPtr ctxt) {
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.startDocument()\n");
#endif
}
/*
* called when the document end has been detected.
*/
void endDocument(xmlParserCtxtPtr ctxt) {
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.endDocument()\n");
#endif
}
/*
* called when an opening tag has been processed.
* TODO We currently have a small pblm with the arguments ...
*/
void startElement(xmlParserCtxtPtr ctxt, const CHAR *name) {
xmlNodePtr parent;
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.startElement(%s)\n", name);
#endif
if (ctxt->nodeNr < 2) return;
parent = ctxt->nodeTab[ctxt->nodeNr - 2];
if (parent != NULL)
xmlAddChild(parent, ctxt->node);
}
/*
* called when the end of an element has been detected.
*/
void endElement(xmlParserCtxtPtr ctxt, const CHAR *name) {
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.endElement(%s)\n", name);
#endif
}
/*
* receiving some chars from the parser.
* Question: how much at a time ???
*/
void characters(xmlParserCtxtPtr ctxt, const CHAR *ch,
int start, int len) {
xmlNodePtr lastChild;
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.characters(%.30s, %d, %d)\n", ch, start, len);
#endif
/*
* Handle the data if any. If there is no child
* add it as content, otherwise if the last child is text,
* concatenate it, else create a new node of type text.
*/
lastChild = xmlGetLastChild(ctxt->node);
if (lastChild == NULL)
xmlNodeAddContentLen(ctxt->node, &ch[start], len);
else {
if (xmlNodeIsText(lastChild))
xmlTextConcat(lastChild, &ch[start], len);
else {
lastChild = xmlNewTextLen(&ch[start], len);
xmlAddChild(ctxt->node, lastChild);
}
}
}
/*
* receiving some ignorable whitespaces from the parser.
* Question: how much at a time ???
*/
void ignorableWhitespace(xmlParserCtxtPtr ctxt, const CHAR *ch,
int start, int len) {
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.ignorableWhitespace(%.30s, %d, %d)\n", ch, start, len);
#endif
}
/*
* A processing instruction has beem parsed.
*/
void processingInstruction(xmlParserCtxtPtr ctxt, const CHAR *target,
const CHAR *data) {
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.processingInstruction(%s, %s)\n", target, data);
#endif
}
xmlSAXHandler xmlDefaultSAXHandler = {
resolveEntity,
notationDecl,
unparsedEntityDecl,
setDocumentLocator,
startDocument,
endDocument,
startElement,
endElement,
characters,
ignorableWhitespace,
processingInstruction,
xmlParserWarning,
xmlParserError,
xmlParserError,
};
void xmlDefaultSAXHandlerInit(void) {
xmlDefaultSAXHandler.resolveEntity = resolveEntity;
xmlDefaultSAXHandler.notationDecl = notationDecl;
xmlDefaultSAXHandler.unparsedEntityDecl = unparsedEntityDecl;
xmlDefaultSAXHandler.setDocumentLocator = setDocumentLocator;
xmlDefaultSAXHandler.startDocument = startDocument;
xmlDefaultSAXHandler.endDocument = endDocument;
xmlDefaultSAXHandler.startElement = startElement;
xmlDefaultSAXHandler.endElement = endElement;
xmlDefaultSAXHandler.characters = characters;
xmlDefaultSAXHandler.ignorableWhitespace = ignorableWhitespace;
xmlDefaultSAXHandler.processingInstruction = processingInstruction;
xmlDefaultSAXHandler.warning = xmlParserWarning;
xmlDefaultSAXHandler.error = xmlParserError;
xmlDefaultSAXHandler.fatalError = xmlParserError;
}
TODO for the XML parser:
- Support for UTF-8 encoding
- progressive parsing. Currently the parser uses a single
string containing the full document. The good point is
that there is no context associated with the parser, the
full state is in the stack. The bad point is that such a
recursive design is hard to make progressive ...
- Better error handling, use a dedicated, overridable error
handling function.
- Keep track of line numbers for better error reporting.
- Support for UTF-8 and UTF-16 encoding (Urgent !!!).
- progressive parsing. The entity support is a first step toward
asbtraction of an input stream. A large part of the context is still
located on the stack, moving to a state machine and putting everyting
in the parsing context should provide an adequate solution.
- DOM support, instead of using a proprietary in memory
format for the document representation, the parser should
call a DOM API to actually build the resulting document.
......@@ -17,14 +13,17 @@
representation of the document. Even better using RPC's
the parser can actually build the document in another
program.
- finish the support for Entities.
- Support for Comments (bad, should be in ASAP, they are parsed
but not stored).
- Support for PI.
- Support for CDATA.
but not stored), should be configurable.
- Improve the support of entities on save (+SAX).
Done:
- C++ support : John Ehresman <jehresma@dsg.harvard.edu>
- Updated code to follow more recent specs, added compatibility flag
- Better error handling, use a dedicated, overridable error
handling function.
- Support for CDATA.
- Keep track of line numbers for better error reporting.
- Support for PI (SAX one).
$Id$
......@@ -5,7 +5,7 @@ DIE=0
(autoconf --version) < /dev/null > /dev/null 2>&1 || {
echo
echo "You must have autoconf installed to compile GLIB."
echo "You must have autoconf installed to compile gnome-xml."
echo "Download the appropriate package for your distribution,"
echo "or get the source tarball at ftp://ftp.gnu.org/pub/gnu/"
DIE=1
......@@ -13,7 +13,7 @@ DIE=0
(libtool --version) < /dev/null > /dev/null 2>&1 || {
echo
echo "You must have libtool installed to compile GLIB."
echo "You must have libtool installed to compile gnome-xml."
echo "Get ftp://alpha.gnu.org/gnu/libtool-1.0h.tar.gz"
echo "(or a newer version if it is available)"
DIE=1
......@@ -21,7 +21,7 @@ DIE=0
(automake --version) < /dev/null > /dev/null 2>&1 || {
echo
echo "You must have automake installed to compile GLIB."
echo "You must have automake installed to compile gnome-xml."
echo "Get ftp://ftp.cygnus.com/pub/home/tromey/automake-1.2d.tar.gz"
echo "(or a newer version if it is available)"
DIE=1
......@@ -31,8 +31,8 @@ if test "$DIE" -eq 1; then
exit 1
fi
test -f xml_entities.h || {
echo "You must run this script in the top-level GLIB directory"
test -f entities.h || {
echo "You must run this script in the top-level gnome-xml directory"
exit 1
}
......
/* config.h. Generated automatically by configure. */
/* config.h.in. Generated automatically from configure.in by autoheader. */
/* Define if you have the strftime function. */
#define HAVE_STRFTIME 1
/* Define if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Define if you have the snprintf function. */
#define HAVE_SNPRINTF 1
/* Define if you have the strdup function. */
#define HAVE_STRDUP 1
/* Define if you have the strerror function. */
#define HAVE_STRERROR 1
/* Define if you have the strndup function. */
#define HAVE_STRNDUP 1
/* Define if you have the <ctype.h> header file. */
#define HAVE_CTYPE_H 1
/* Define if you have the <dirent.h> header file. */
#define HAVE_DIRENT_H 1
/* Define if you have the <errno.h> header file. */
#define HAVE_ERRNO_H 1
/* Define if you have the <fcntl.h> header file. */
#define HAVE_FCNTL_H 1
/* Define if you have the <malloc.h> header file. */
#define HAVE_MALLOC_H 1
/* Define if you have the <ndir.h> header file. */
/* #undef HAVE_NDIR_H */
/* Define if you have the <stdarg.h> header file. */
#define HAVE_STDARG_H 1
/* Define if you have the <sys/dir.h> header file. */
/* #undef HAVE_SYS_DIR_H */
/* Define if you have the <sys/ndir.h> header file. */
/* #undef HAVE_SYS_NDIR_H */
/* Define if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define if you have the <time.h> header file. */
#define HAVE_TIME_H 1
/* Define if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Define if you have the <zlib.h> header file. */
#define HAVE_ZLIB_H 1
dnl Process this file with autoconf to produce a configure script.
AC_PREREQ(2.2)
AC_INIT(xml_entities.h)
AC_INIT(entities.h)
AM_CONFIG_HEADER(config.h)
AM_INIT_AUTOMAKE(libxml, 0.10)
AM_INIT_AUTOMAKE(libxml, 0.20)
dnl Checks for programs.
AC_PROG_CC
......
This diff is collapsed.
......@@ -8,21 +8,31 @@
#ifndef __XML_ENTITIES_H__
#define __XML_ENTITIES_H__
#include "xml_parser.h"
#include "parser.h"
#ifdef __cplusplus
extern "C" {
#endif
#define XML_INTERNAL_GENERAL_ENTITY 1
#define XML_EXTERNAL_GENERAL_PARSED_ENTITY 2
#define XML_EXTERNAL_GENERAL_UNPARSED_ENTITY 3
#define XML_INTERNAL_PARAMETER_ENTITY 4
#define XML_EXTERNAL_PARAMETER_ENTITY 5
/*
* An unit of storage for an entity, contains the string, the value
* and the linkind data needed for the linking in the hash table.
*/
typedef struct xmlEntity {
const CHAR *id; /* The entity name */
CHAR *value; /* The entity CHAR equivalent */
int type; /* The entity type */
int len; /* The lenght of the name */
const CHAR *name; /* Name of the entity */
const CHAR *ExternalID; /* External identifier for PUBLIC Entity */
const CHAR *SystemID; /* URI for a SYSTEM or PUBLIC Entity */
CHAR *content; /* The entity content or ndata if unparsed */
} xmlEntity, *xmlEntityPtr;
/*
......@@ -42,14 +52,16 @@ typedef struct xmlEntitiesTable {
* External functions :
*/
extern void xmlAddDocEntity(xmlDocPtr doc, CHAR *value, const CHAR *id);
extern void xmlAddDtdEntity(xmlDtdPtr dtd, CHAR *value, const CHAR *id);
extern CHAR *xmlGetEntity(xmlDocPtr doc, const CHAR *id);
extern CHAR *xmlSubstituteEntities(xmlDocPtr doc, const CHAR *input);
extern void xmlAddDocEntity(xmlDocPtr doc, const CHAR *name, int type,
const CHAR *ExternalID, const CHAR *SystemID, CHAR *content);
extern void xmlAddDtdEntity(xmlDocPtr doc, const CHAR *name, int type,
const CHAR *ExternalID, const CHAR *SystemID, CHAR *content);
extern xmlEntityPtr xmlGetDocEntity(xmlDocPtr doc, const CHAR *name);
extern xmlEntityPtr xmlGetDtdEntity(xmlDocPtr doc, const CHAR *name);
extern CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input);
extern CHAR *xmlDecodeEntities(xmlDocPtr doc, const CHAR *input, int len);
extern xmlEntitiesTablePtr xmlCreateEntitiesTable(void);
extern void xmlFreeEntitiesTable(xmlEntitiesTablePtr table);
extern void xmlDumpEntitiesTable(xmlEntitiesTablePtr table);
#ifdef __cplusplus
}
......
/*
* error.c: module displaying errors
*/
#include <stdio.h>
#include <stdarg.h>
#include "parser.h"
/*
* Display and format error messages.
*/
void xmlParserError(xmlParserCtxtPtr ctxt, const char *msg, ...) {
const CHAR *cur, *base;
va_list args;
int n;
va_start(args, msg);
if (ctxt->input->filename)
fprintf(stderr, "%s:%d: ", ctxt->input->filename,
ctxt->input->line);
else
fprintf(stderr, "line %d: ", ctxt->input->line);
fprintf(stderr, "error: ");
vfprintf(stderr, msg, args);
va_end(ap);
cur = ctxt->input->cur;
base = ctxt->input->base;
while ((*cur == '\n') || (*cur == '\r')) {
cur--;
base--;
}
n = 0;
while ((n++ < 60) && (cur >= base) && (*cur != '\n') && (*cur != '\r'))
cur--;
if ((*cur == '\n') || (*cur == '\r')) cur++;
base = cur;
n = 0;
while ((*cur != 0) && (*cur != '\n') && (*cur != '\r') && (n < 79)) {
fprintf(stderr, "%c", (unsigned char) *cur++);
n++;
}
fprintf(stderr, "\n");
cur = ctxt->input->cur;
while ((*cur == '\n') || (*cur == '\r'))
cur--;
n = 0;
while ((cur != base) && (n++ < 60)) {
fprintf(stderr, " ");
base++;
}
fprintf(stderr,"^\n");
}
/*
* Display and format error messages.
*/
void xmlParserWarning(xmlParserCtxtPtr ctxt, const char *msg, ...) {
const CHAR *cur, *base;
va_list args;
int n;
va_start(args, msg);
if (ctxt->input->filename)
fprintf(stderr, "%s:%d: ", ctxt->input->filename,
ctxt->input->line);
else
fprintf(stderr, "line %d: ", ctxt->input->line);
fprintf(stderr, "warning: ");
vfprintf(stderr, msg, args);
va_end(ap);
cur = ctxt->input->cur;
base = ctxt->input->base;
n = 0;
while ((n++ < 60) && (cur >= base) && (*cur != '\n') && (*cur != '\r'))
cur--;
if ((*cur != '\n') || (*cur != '\r')) cur++;
base = cur;
n = 0;
while ((*cur != 0) && (*cur != '\n') && (*cur != '\r') && (n < 79)) {
fprintf(stderr, "%c", (unsigned char) *cur++);
n++;
}
fprintf(stderr, "\n");
cur = ctxt->input->cur;
n = 0;
while ((cur != base) && (n++ < 60)) {
fprintf(stderr, " ");
base++;
}
fprintf(stderr,"^\n");
}
/*
* entities.h : interface for the XML entities handking
*
* See Copyright for the status of this software.
*
* $Id$
*/
#ifndef __XML_ENTITIES_H__
#define __XML_ENTITIES_H__
#include "parser.h"
#ifdef __cplusplus
extern "C" {
#endif
#define XML_INTERNAL_GENERAL_ENTITY 1
#define XML_EXTERNAL_GENERAL_PARSED_ENTITY 2
#define XML_EXTERNAL_GENERAL_UNPARSED_ENTITY 3
#define XML_INTERNAL_PARAMETER_ENTITY 4
#define XML_EXTERNAL_PARAMETER_ENTITY 5
/*
* An unit of storage for an entity, contains the string, the value
* and the linkind data needed for the linking in the hash table.
*/
typedef struct xmlEntity {
int type; /* The entity type */
int len; /* The lenght of the name */
const CHAR *name; /* Name of the entity */
const CHAR *ExternalID; /* External identifier for PUBLIC Entity */
const CHAR *SystemID; /* URI for a SYSTEM or PUBLIC Entity */
CHAR *content; /* The entity content or ndata if unparsed */
} xmlEntity, *xmlEntityPtr;
/*
* ALl entities are stored in a table there is one table per DTD
* and one extra per document.
*/
#define XML_MIN_ENTITIES_TABLE 32
typedef struct xmlEntitiesTable {
int nb_entities; /* number of elements stored */
int max_entities; /* maximum number of elements */
xmlEntityPtr table; /* the table of entities */
} xmlEntitiesTable, *xmlEntitiesTablePtr;
/*
* External functions :
*/
extern void xmlAddDocEntity(xmlDocPtr doc, const CHAR *name, int type,
const CHAR *ExternalID, const CHAR *SystemID, CHAR *content);
extern void xmlAddDtdEntity(xmlDocPtr doc, const CHAR *name, int type,
const CHAR *ExternalID, const CHAR *SystemID, CHAR *content);
extern xmlEntityPtr xmlGetDocEntity(xmlDocPtr doc, const CHAR *name);
extern xmlEntityPtr xmlGetDtdEntity(xmlDocPtr doc, const CHAR *name);
extern CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input);
extern xmlEntitiesTablePtr xmlCreateEntitiesTable(void);
extern void xmlFreeEntitiesTable(xmlEntitiesTablePtr table);
extern void xmlDumpEntitiesTable(xmlEntitiesTablePtr table);
#ifdef __cplusplus
}
#endif
# endif /* __XML_ENTITIES_H__ */
/*
* parser.h : constants and stuff related to the XML parser.
*
* See Copyright for the status of this software.
*
* $Id$
*/
#ifndef __XML_PARSER_H__
#define __XML_PARSER_H__
#include "tree.h"
#ifdef __cplusplus
extern "C" {
#endif
/*
* Constants.
*/
#define XML_DEFAULT_VERSION "1.0"
typedef struct xmlParserInput {
const char *filename; /* The file analyzed, if any */
const CHAR *base; /* Base of the array to parse */
const CHAR *cur; /* Current char being parsed */
int line; /* Current line */
int col; /* Current column */
} xmlParserInput, *xmlParserInputPtr;
typedef struct xmlParserNodeInfo {
const struct xmlNode* node;
/* Position & line # that text that created the node begins & ends on */
unsigned long begin_pos;
unsigned long begin_line;
unsigned long end_pos;
unsigned long end_line;
} xmlParserNodeInfo;
typedef struct xmlParserNodeInfoSeq {
unsigned long maximum;
unsigned long length;
xmlParserNodeInfo* buffer;
} xmlParserNodeInfoSeq, *xmlParserNodeInfoSeqPtr;
typedef struct xmlParserCtxt {
struct xmlSAXHandler *sax; /* The SAX handler */
xmlDocPtr doc; /* the document being built */
/* Input stream stack */
xmlParserInputPtr input; /* Current input stream */
int inputNr; /* Number of current input streams */
int inputMax; /* Max number of input streams */
xmlParserInputPtr *inputTab; /* stack of inputs */
/* Node analysis stack */
xmlNodePtr node; /* Current parsed Node */
int nodeNr; /* Depth of the parsing stack */
int nodeMax; /* Max depth of the parsing stack */
xmlNodePtr *nodeTab; /* array of nodes */
int record_info; /* Whether node info should be kept */
xmlParserNodeInfoSeq node_seq; /* info about each node parsed */
} xmlParserCtxt, *xmlParserCtxtPtr;
/*