parser.c 427 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
 *            implemented on top of the SAX interfaces
 *
 * References:
 *   The XML specification:
 *     http://www.w3.org/TR/REC-xml
 *   Original 1.0 version:
 *     http://www.w3.org/TR/1998/REC-xml-19980210
 *   XML second edition working draft
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
 *
 * Okay this is a big file, the parser core is around 7000 lines, then it
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16 17 18 19
 * A number of helper functions and deprecated ones have been moved to
 * parserInternals.c to reduce this file size.
 * As much as possible the functions are associated with their relative
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21 22 23 24
 * parserInternals.h or parserInternals.c
 * The DOM tree build is realized from the default SAX callbacks in
 * the module SAX.c.
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26 27 28 29
 * document.
 *
 * See Copyright for the status of this software.
 *
30
 * daniel@veillard.com
31 32
 */

33
#define IN_LIBXML
34 35
#include "libxml.h"

36
#if defined(WIN32) && !defined (__CYGWIN__)
37 38 39 40 41 42
#define XML_DIR_SEP '\\'
#else
#define XML_DIR_SEP '/'
#endif

#include <stdlib.h>
43
#include <limits.h>
44
#include <string.h>
45
#include <stdarg.h>
46
#include <libxml/xmlmemory.h>
47 48
#include <libxml/threads.h>
#include <libxml/globals.h>
49 50 51 52 53 54 55 56 57
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include <libxml/valid.h>
#include <libxml/entities.h>
#include <libxml/xmlerror.h>
#include <libxml/encoding.h>
#include <libxml/xmlIO.h>
#include <libxml/uri.h>
58 59 60
#ifdef LIBXML_CATALOG_ENABLED
#include <libxml/catalog.h>
#endif
61 62 63 64
#ifdef LIBXML_SCHEMAS_ENABLED
#include <libxml/xmlschemastypes.h>
#include <libxml/relaxng.h>
#endif
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
#ifdef HAVE_CTYPE_H
#include <ctype.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif
83 84 85
#ifdef HAVE_LZMA_H
#include <lzma.h>
#endif
86

87 88 89
#include "buf.h"
#include "enc.h"

90 91 92
static void
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);

93 94 95 96
static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
	                  const xmlChar *base, xmlParserCtxtPtr pctx);

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
/************************************************************************
 *									*
 *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
 *									*
 ************************************************************************/

#define XML_PARSER_BIG_ENTITY 1000
#define XML_PARSER_LOT_ENTITY 5000

/*
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
 *    replacement over the size in byte of the input indicates that you have
 *    and eponential behaviour. A value of 10 correspond to at least 3 entity
 *    replacement per byte of input.
 */
#define XML_PARSER_NON_LINEAR 10

/*
 * xmlParserEntityCheck
 *
 * Function to check non-linear entity expansion behaviour
 * This is here to detect and stop exponential linear entity expansion
 * This is not a limitation of the parser but a safety
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
 * parser option.
 */
static int
124
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
125
                     xmlEntityPtr ent, size_t replacement)
126
{
127
    size_t consumed = 0;
128 129 130 131 132

    if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
        return (0);
    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
        return (1);
Daniel Veillard's avatar
Daniel Veillard committed
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155

    /*
     * This may look absurd but is needed to detect
     * entities problems
     */
    if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
	(ent->content != NULL) && (ent->checked == 0)) {
	unsigned long oldnbent = ctxt->nbentities;
	xmlChar *rep;

	ent->checked = 1;

	rep = xmlStringDecodeEntities(ctxt, ent->content,
				  XML_SUBSTITUTE_REF, 0, 0, 0);

	ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
	if (rep != NULL) {
	    if (xmlStrchr(rep, '<'))
		ent->checked |= 1;
	    xmlFree(rep);
	    rep = NULL;
	}
    }
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
    if (replacement != 0) {
	if (replacement < XML_MAX_TEXT_LENGTH)
	    return(0);

        /*
	 * If the volume of entity copy reaches 10 times the
	 * amount of parsed data and over the large text threshold
	 * then that's very likely to be an abuse.
	 */
        if (ctxt->input != NULL) {
	    consumed = ctxt->input->consumed +
	               (ctxt->input->cur - ctxt->input->base);
	}
        consumed += ctxt->sizeentities;

        if (replacement < XML_PARSER_NON_LINEAR * consumed)
	    return(0);
    } else if (size != 0) {
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
        /*
         * Do the check based on the replacement size of the entity
         */
        if (size < XML_PARSER_BIG_ENTITY)
	    return(0);

        /*
         * A limit on the amount of text data reasonably used
         */
        if (ctxt->input != NULL) {
            consumed = ctxt->input->consumed +
                (ctxt->input->cur - ctxt->input->base);
        }
        consumed += ctxt->sizeentities;

        if ((size < XML_PARSER_NON_LINEAR * consumed) &&
	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
            return (0);
    } else if (ent != NULL) {
        /*
         * use the number of parsed entities in the replacement
         */
196
        size = ent->checked / 2;
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214

        /*
         * The amount of data parsed counting entities size only once
         */
        if (ctxt->input != NULL) {
            consumed = ctxt->input->consumed +
                (ctxt->input->cur - ctxt->input->base);
        }
        consumed += ctxt->sizeentities;

        /*
         * Check the density of entities for the amount of data
	 * knowing an entity reference will take at least 3 bytes
         */
        if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
            return (0);
    } else {
        /*
Daniel Veillard's avatar
Daniel Veillard committed
215
         * strange we got no data for checking
216
         */
Daniel Veillard's avatar
Daniel Veillard committed
217 218 219 220
	if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
	     (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
	    (ctxt->nbentities <= 10000))
	    return (0);
221 222 223 224 225
    }
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
    return (1);
}

226
/**
227
 * xmlParserMaxDepth:
228
 *
229 230 231 232
 * arbitrary depth limit for the XML documents that we allow to
 * process. This is not a limitation of the parser but a safety
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
 * parser option.
233
 */
234
unsigned int xmlParserMaxDepth = 256;
235

236

237 238

#define SAX2 1
239
#define XML_PARSER_BIG_BUFFER_SIZE 300
240
#define XML_PARSER_BUFFER_SIZE 100
241 242
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"

243 244 245 246 247 248 249 250 251 252 253
/**
 * XML_PARSER_CHUNK_SIZE
 *
 * When calling GROW that's the minimal amount of data
 * the parser expected to have received. It is not a hard
 * limit but an optimization when reading strings like Names
 * It is not strictly needed as long as inputs available characters
 * are followed by 0, which should be provided by the I/O level
 */
#define XML_PARSER_CHUNK_SIZE 100

254 255 256 257
/*
 * List of XML prefixed PI allowed by W3C specs
 */

258
static const char *xmlW3CPIs[] = {
259
    "xml-stylesheet",
260
    "xml-model",
261 262 263
    NULL
};

264

265
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
266 267
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
                                              const xmlChar **str);
268

269
static xmlParserErrors
270 271
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
	              xmlSAXHandlerPtr sax,
272
		      void *user_data, int depth, const xmlChar *URL,
273
		      const xmlChar *ID, xmlNodePtr *list);
274

275 276 277
static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
                          const char *encoding);
278
#ifdef LIBXML_LEGACY_ENABLED
279 280 281
static void
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
                      xmlNodePtr lastNode);
282
#endif /* LIBXML_LEGACY_ENABLED */
283

284
static xmlParserErrors
285 286
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
287

288 289 290
static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);

291 292
/************************************************************************
 *									*
293
 *		Some factorized error routines				*
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
 *									*
 ************************************************************************/

/**
 * xmlErrAttributeDup:
 * @ctxt:  an XML parser context
 * @prefix:  the attribute prefix
 * @localname:  the attribute localname
 *
 * Handle a redefinition of attribute error
 */
static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
                   const xmlChar * localname)
{
309 310 311
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
312 313
    if (ctxt != NULL)
	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
314

315
    if (prefix == NULL)
316
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
317
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
318 319
                        (const char *) localname, NULL, NULL, 0, 0,
                        "Attribute %s redefined\n", localname);
320
    else
321
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
322
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
323 324 325
                        (const char *) prefix, (const char *) localname,
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
                        localname);
326 327 328 329 330
    if (ctxt != NULL) {
	ctxt->wellFormed = 0;
	if (ctxt->recovery == 0)
	    ctxt->disableSAX = 1;
    }
331 332 333 334 335 336 337 338 339 340 341
}

/**
 * xmlFatalErr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @extra:  extra information string
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
342
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
343 344
{
    const char *errmsg;
345
    char errstr[129] = "";
346

347 348 349
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
350 351
    switch (error) {
        case XML_ERR_INVALID_HEX_CHARREF:
352
            errmsg = "CharRef: invalid hexadecimal value";
353
            break;
354
        case XML_ERR_INVALID_DEC_CHARREF:
355
            errmsg = "CharRef: invalid decimal value";
356
            break;
357
        case XML_ERR_INVALID_CHARREF:
358
            errmsg = "CharRef: invalid value";
359
            break;
360
        case XML_ERR_INTERNAL_ERROR:
361 362
            errmsg = "internal error";
            break;
363
        case XML_ERR_PEREF_AT_EOF:
364
            errmsg = "PEReference at end of document";
365
            break;
366
        case XML_ERR_PEREF_IN_PROLOG:
367
            errmsg = "PEReference in prolog";
368
            break;
369
        case XML_ERR_PEREF_IN_EPILOG:
370
            errmsg = "PEReference in epilog";
371
            break;
372
        case XML_ERR_PEREF_NO_NAME:
373
            errmsg = "PEReference: no name";
374
            break;
375
        case XML_ERR_PEREF_SEMICOL_MISSING:
376
            errmsg = "PEReference: expecting ';'";
377
            break;
378
        case XML_ERR_ENTITY_LOOP:
379
            errmsg = "Detected an entity reference loop";
380
            break;
381
        case XML_ERR_ENTITY_NOT_STARTED:
382
            errmsg = "EntityValue: \" or ' expected";
383
            break;
384
        case XML_ERR_ENTITY_PE_INTERNAL:
385
            errmsg = "PEReferences forbidden in internal subset";
386
            break;
387
        case XML_ERR_ENTITY_NOT_FINISHED:
388
            errmsg = "EntityValue: \" or ' expected";
389
            break;
390
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
391
            errmsg = "AttValue: \" or ' expected";
392
            break;
393
        case XML_ERR_LT_IN_ATTRIBUTE:
394
            errmsg = "Unescaped '<' not allowed in attributes values";
395
            break;
396
        case XML_ERR_LITERAL_NOT_STARTED:
397
            errmsg = "SystemLiteral \" or ' expected";
398
            break;
399
        case XML_ERR_LITERAL_NOT_FINISHED:
400
            errmsg = "Unfinished System or Public ID \" or ' expected";
401
            break;
402
        case XML_ERR_MISPLACED_CDATA_END:
403
            errmsg = "Sequence ']]>' not allowed in content";
404
            break;
405
        case XML_ERR_URI_REQUIRED:
406
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
407
            break;
408
        case XML_ERR_PUBID_REQUIRED:
409
            errmsg = "PUBLIC, the Public Identifier is missing";
410
            break;
411
        case XML_ERR_HYPHEN_IN_COMMENT:
412
            errmsg = "Comment must not contain '--' (double-hyphen)";
413
            break;
414
        case XML_ERR_PI_NOT_STARTED:
415
            errmsg = "xmlParsePI : no target name";
416
            break;
417
        case XML_ERR_RESERVED_XML_NAME:
418
            errmsg = "Invalid PI name";
419
            break;
420
        case XML_ERR_NOTATION_NOT_STARTED:
421
            errmsg = "NOTATION: Name expected here";
422
            break;
423
        case XML_ERR_NOTATION_NOT_FINISHED:
424
            errmsg = "'>' required to close NOTATION declaration";
425
            break;
426
        case XML_ERR_VALUE_REQUIRED:
427
            errmsg = "Entity value required";
428
            break;
429
        case XML_ERR_URI_FRAGMENT:
430 431
            errmsg = "Fragment not allowed";
            break;
432
        case XML_ERR_ATTLIST_NOT_STARTED:
433
            errmsg = "'(' required to start ATTLIST enumeration";
434
            break;
435
        case XML_ERR_NMTOKEN_REQUIRED:
436
            errmsg = "NmToken expected in ATTLIST enumeration";
437
            break;
438
        case XML_ERR_ATTLIST_NOT_FINISHED:
439
            errmsg = "')' required to finish ATTLIST enumeration";
440
            break;
441
        case XML_ERR_MIXED_NOT_STARTED:
442
            errmsg = "MixedContentDecl : '|' or ')*' expected";
443
            break;
444
        case XML_ERR_PCDATA_REQUIRED:
445
            errmsg = "MixedContentDecl : '#PCDATA' expected";
446
            break;
447
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
448
            errmsg = "ContentDecl : Name or '(' expected";
449
            break;
450
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
451
            errmsg = "ContentDecl : ',' '|' or ')' expected";
452
            break;
453
        case XML_ERR_PEREF_IN_INT_SUBSET:
454
            errmsg =
455
                "PEReference: forbidden within markup decl in internal subset";
456
            break;
457
        case XML_ERR_GT_REQUIRED:
458
            errmsg = "expected '>'";
459
            break;
460
        case XML_ERR_CONDSEC_INVALID:
461
            errmsg = "XML conditional section '[' expected";
462
            break;
463
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
464
            errmsg = "Content error in the external subset";
465 466 467
            break;
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
            errmsg =
468
                "conditional section INCLUDE or IGNORE keyword expected";
469
            break;
470
        case XML_ERR_CONDSEC_NOT_FINISHED:
471
            errmsg = "XML conditional section not closed";
472
            break;
473
        case XML_ERR_XMLDECL_NOT_STARTED:
474
            errmsg = "Text declaration '<?xml' required";
475
            break;
476
        case XML_ERR_XMLDECL_NOT_FINISHED:
477
            errmsg = "parsing XML declaration: '?>' expected";
478
            break;
479
        case XML_ERR_EXT_ENTITY_STANDALONE:
480
            errmsg = "external parsed entities cannot be standalone";
481
            break;
482
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
483
            errmsg = "EntityRef: expecting ';'";
484
            break;
485
        case XML_ERR_DOCTYPE_NOT_FINISHED:
486
            errmsg = "DOCTYPE improperly terminated";
487
            break;
488
        case XML_ERR_LTSLASH_REQUIRED:
489
            errmsg = "EndTag: '</' not found";
490
            break;
491
        case XML_ERR_EQUAL_REQUIRED:
492
            errmsg = "expected '='";
493
            break;
494
        case XML_ERR_STRING_NOT_CLOSED:
495
            errmsg = "String not closed expecting \" or '";
496
            break;
497
        case XML_ERR_STRING_NOT_STARTED:
498
            errmsg = "String not started expecting ' or \"";
499
            break;
500
        case XML_ERR_ENCODING_NAME:
501
            errmsg = "Invalid XML encoding name";
502
            break;
503
        case XML_ERR_STANDALONE_VALUE:
504
            errmsg = "standalone accepts only 'yes' or 'no'";
505
            break;
506
        case XML_ERR_DOCUMENT_EMPTY:
507
            errmsg = "Document is empty";
508
            break;
509
        case XML_ERR_DOCUMENT_END:
510
            errmsg = "Extra content at the end of the document";
511
            break;
512
        case XML_ERR_NOT_WELL_BALANCED:
513
            errmsg = "chunk is not well balanced";
514
            break;
515
        case XML_ERR_EXTRA_CONTENT:
516
            errmsg = "extra content at the end of well balanced chunk";
517
            break;
518
        case XML_ERR_VERSION_MISSING:
519
            errmsg = "Malformed declaration expecting version";
520
            break;
521
        case XML_ERR_NAME_TOO_LONG:
522
            errmsg = "Name too long use XML_PARSE_HUGE option";
523
            break;
524
#if 0
525
        case:
526
            errmsg = "";
527
            break;
528
#endif
529
        default:
530
            errmsg = "Unregistered error message";
531
    }
532 533 534 535
    if (info == NULL)
        snprintf(errstr, 128, "%s\n", errmsg);
    else
        snprintf(errstr, 128, "%s: %%s\n", errmsg);
536 537
    if (ctxt != NULL)
	ctxt->errNo = error;
538
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
539
                    XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
540
                    info);
541 542 543 544 545
    if (ctxt != NULL) {
	ctxt->wellFormed = 0;
	if (ctxt->recovery == 0)
	    ctxt->disableSAX = 1;
    }
546 547
}

548 549 550 551 552 553 554 555 556
/**
 * xmlFatalErrMsg:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
557 558
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
               const char *msg)
559
{
560 561 562
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
563 564
    if (ctxt != NULL)
	ctxt->errNo = error;
565
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
566
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
567 568 569 570 571
    if (ctxt != NULL) {
	ctxt->wellFormed = 0;
	if (ctxt->recovery == 0)
	    ctxt->disableSAX = 1;
    }
572 573
}

574 575 576 577 578 579 580 581 582 583 584 585 586 587
/**
 * xmlWarningMsg:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  extra data
 * @str2:  extra data
 *
 * Handle a warning.
 */
static void
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
              const char *msg, const xmlChar *str1, const xmlChar *str2)
{
Daniel Veillard's avatar
Daniel Veillard committed
588
    xmlStructuredErrorFunc schannel = NULL;
589

590 591 592
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
593 594
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard's avatar
Daniel Veillard committed
595
        schannel = ctxt->sax->serror;
596 597
    if (ctxt != NULL) {
        __xmlRaiseError(schannel,
598 599
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
                    ctxt->userData,
600 601 602 603
                    ctxt, NULL, XML_FROM_PARSER, error,
                    XML_ERR_WARNING, NULL, 0,
		    (const char *) str1, (const char *) str2, NULL, 0, 0,
		    msg, (const char *) str1, (const char *) str2);
604 605 606 607 608 609 610
    } else {
        __xmlRaiseError(schannel, NULL, NULL,
                    ctxt, NULL, XML_FROM_PARSER, error,
                    XML_ERR_WARNING, NULL, 0,
		    (const char *) str1, (const char *) str2, NULL, 0, 0,
		    msg, (const char *) str1, (const char *) str2);
    }
611 612 613 614 615 616 617 618 619
}

/**
 * xmlValidityError:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  extra data
 *
620
 * Handle a validity error.
621 622 623
 */
static void
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
624
              const char *msg, const xmlChar *str1, const xmlChar *str2)
625
{
Daniel Veillard's avatar
Daniel Veillard committed
626
    xmlStructuredErrorFunc schannel = NULL;
627 628 629 630

    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
631 632 633 634 635
    if (ctxt != NULL) {
	ctxt->errNo = error;
	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
	    schannel = ctxt->sax->serror;
    }
636 637
    if (ctxt != NULL) {
        __xmlRaiseError(schannel,
638
                    ctxt->vctxt.error, ctxt->vctxt.userData,
639 640
                    ctxt, NULL, XML_FROM_DTD, error,
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
641 642
		    (const char *) str2, NULL, 0, 0,
		    msg, (const char *) str1, (const char *) str2);
643
	ctxt->valid = 0;
644 645 646 647 648 649
    } else {
        __xmlRaiseError(schannel, NULL, NULL,
                    ctxt, NULL, XML_FROM_DTD, error,
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
		    (const char *) str2, NULL, 0, 0,
		    msg, (const char *) str1, (const char *) str2);
650
    }
651 652
}

653 654 655 656 657 658 659 660 661 662 663
/**
 * xmlFatalErrMsgInt:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  an integer value
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
664
                  const char *msg, int val)
665
{
666 667 668
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
669 670
    if (ctxt != NULL)
	ctxt->errNo = error;
671
    __xmlRaiseError(NULL, NULL, NULL,
672 673
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
674 675 676 677 678
    if (ctxt != NULL) {
	ctxt->wellFormed = 0;
	if (ctxt->recovery == 0)
	    ctxt->disableSAX = 1;
    }
679 680
}

681 682 683 684 685 686 687 688 689 690 691 692 693
/**
 * xmlFatalErrMsgStrIntStr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  an string info
 * @val:  an integer value
 * @str2:  an string info
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694
                  const char *msg, const xmlChar *str1, int val,
695 696
		  const xmlChar *str2)
{
697 698 699
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
700 701
    if (ctxt != NULL)
	ctxt->errNo = error;
702
    __xmlRaiseError(NULL, NULL, NULL,
703 704 705
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, (const char *) str1, (const char *) str2,
		    NULL, val, 0, msg, str1, val, str2);
706 707 708 709 710
    if (ctxt != NULL) {
	ctxt->wellFormed = 0;
	if (ctxt->recovery == 0)
	    ctxt->disableSAX = 1;
    }
711 712
}

713 714 715 716 717 718 719 720 721 722 723
/**
 * xmlFatalErrMsgStr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  a string value
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
724
                  const char *msg, const xmlChar * val)
725
{
726 727 728
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
729 730
    if (ctxt != NULL)
	ctxt->errNo = error;
731
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
732 733 734
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
                    val);
735 736 737 738 739
    if (ctxt != NULL) {
	ctxt->wellFormed = 0;
	if (ctxt->recovery == 0)
	    ctxt->disableSAX = 1;
    }
740 741
}

742 743 744 745 746 747 748 749 750 751 752 753 754
/**
 * xmlErrMsgStr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  a string value
 *
 * Handle a non fatal parser error
 */
static void
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
                  const char *msg, const xmlChar * val)
{
755 756 757
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
758 759
    if (ctxt != NULL)
	ctxt->errNo = error;
760
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
761 762 763 764 765
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
                    val);
}

766 767 768 769 770 771 772 773 774 775 776 777 778
/**
 * xmlNsErr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the message
 * @info1:  extra information string
 * @info2:  extra information string
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
         const char *msg,
779 780
         const xmlChar * info1, const xmlChar * info2,
         const xmlChar * info3)
781
{
782 783 784
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
785 786
    if (ctxt != NULL)
	ctxt->errNo = error;
787
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
788 789 790
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
                    (const char *) info2, (const char *) info3, 0, 0, msg,
                    info1, info2, info3);
791 792
    if (ctxt != NULL)
	ctxt->nsWellFormed = 0;
793 794
}

795 796 797 798 799 800 801 802
/**
 * xmlNsWarn
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the message
 * @info1:  extra information string
 * @info2:  extra information string
 *
803
 * Handle a namespace warning error
804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819
 */
static void
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
         const char *msg,
         const xmlChar * info1, const xmlChar * info2,
         const xmlChar * info3)
{
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
                    (const char *) info2, (const char *) info3, 0, 0, msg,
                    info1, info2, info3);
}

820 821
/************************************************************************
 *									*
822
 *		Library wide options					*
823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839
 *									*
 ************************************************************************/

/**
  * xmlHasFeature:
  * @feature: the feature to be examined
  *
  * Examines if the library has been compiled with a given feature.
  *
  * Returns a non-zero value if the feature exist, otherwise zero.
  * Returns zero (0) if the feature does not exist or an unknown
  * unknown feature is requested, non-zero otherwise.
  */
int
xmlHasFeature(xmlFeature feature)
{
    switch (feature) {
840
	case XML_WITH_THREAD:
841 842 843 844 845
#ifdef LIBXML_THREAD_ENABLED
	    return(1);
#else
	    return(0);
#endif
846
        case XML_WITH_TREE:
847 848 849 850 851
#ifdef LIBXML_TREE_ENABLED
            return(1);
#else
            return(0);
#endif
852
        case XML_WITH_OUTPUT:
853 854 855 856 857
#ifdef LIBXML_OUTPUT_ENABLED
            return(1);
#else
            return(0);
#endif
858
        case XML_WITH_PUSH:
859 860 861 862 863
#ifdef LIBXML_PUSH_ENABLED
            return(1);
#else
            return(0);
#endif
864
        case XML_WITH_READER:
865 866 867 868 869
#ifdef LIBXML_READER_ENABLED
            return(1);
#else
            return(0);
#endif
870
        case XML_WITH_PATTERN:
871 872 873 874 875
#ifdef LIBXML_PATTERN_ENABLED
            return(1);
#else
            return(0);
#endif
876
        case XML_WITH_WRITER:
877 878 879 880 881
#ifdef LIBXML_WRITER_ENABLED
            return(1);
#else
            return(0);
#endif
882
        case XML_WITH_SAX1:
883 884 885 886 887
#ifdef LIBXML_SAX1_ENABLED
            return(1);
#else
            return(0);
#endif
888
        case XML_WITH_FTP:
889 890 891 892 893
#ifdef LIBXML_FTP_ENABLED
            return(1);
#else
            return(0);
#endif
894
        case XML_WITH_HTTP:
895 896 897 898 899
#ifdef LIBXML_HTTP_ENABLED
            return(1);
#else
            return(0);
#endif
900
        case XML_WITH_VALID:
901 902 903 904 905
#ifdef LIBXML_VALID_ENABLED
            return(1);
#else
            return(0);
#endif
906
        case XML_WITH_HTML:
907 908 909 910 911
#ifdef LIBXML_HTML_ENABLED
            return(1);
#else
            return(0);
#endif
912
        case XML_WITH_LEGACY:
913 914 915 916 917
#ifdef LIBXML_LEGACY_ENABLED
            return(1);
#else
            return(0);
#endif
918
        case XML_WITH_C14N:
919 920 921 922 923
#ifdef LIBXML_C14N_ENABLED
            return(1);
#else
            return(0);
#endif
924
        case XML_WITH_CATALOG:
925 926 927 928 929
#ifdef LIBXML_CATALOG_ENABLED
            return(1);
#else
            return(0);
#endif
930
        case XML_WITH_XPATH:
931 932 933 934 935
#ifdef LIBXML_XPATH_ENABLED
            return(1);
#else
            return(0);
#endif
936
        case XML_WITH_XPTR:
937 938 939 940 941
#ifdef LIBXML_XPTR_ENABLED
            return(1);
#else
            return(0);
#endif
942
        case XML_WITH_XINCLUDE:
943 944 945 946 947
#ifdef LIBXML_XINCLUDE_ENABLED
            return(1);
#else
            return(0);
#endif
948
        case XML_WITH_ICONV:
949 950 951 952 953
#ifdef LIBXML_ICONV_ENABLED
            return(1);
#else
            return(0);
#endif
954
        case XML_WITH_ISO8859X:
955 956 957 958 959
#ifdef LIBXML_ISO8859X_ENABLED
            return(1);
#else
            return(0);
#endif
960
        case XML_WITH_UNICODE:
961 962 963 964 965
#ifdef LIBXML_UNICODE_ENABLED
            return(1);
#else
            return(0);
#endif
966
        case XML_WITH_REGEXP:
967 968 969 970 971
#ifdef LIBXML_REGEXP_ENABLED
            return(1);
#else
            return(0);
#endif
972
        case XML_WITH_AUTOMATA:
973 974 975 976 977
#ifdef LIBXML_AUTOMATA_ENABLED
            return(1);
#else
            return(0);
#endif
978
        case XML_WITH_EXPR:
979 980 981 982 983
#ifdef LIBXML_EXPR_ENABLED
            return(1);
#else
            return(0);
#endif
984
        case XML_WITH_SCHEMAS:
985 986 987 988 989
#ifdef LIBXML_SCHEMAS_ENABLED
            return(1);
#else
            return(0);
#endif
990
        case XML_WITH_SCHEMATRON:
991 992 993 994 995
#ifdef LIBXML_SCHEMATRON_ENABLED
            return(1);
#else
            return(0);
#endif
996
        case XML_WITH_MODULES:
997 998 999 1000 1001
#ifdef LIBXML_MODULES_ENABLED
            return(1);
#else
            return(0);
#endif
1002
        case XML_WITH_DEBUG:
1003 1004 1005 1006 1007
#ifdef LIBXML_DEBUG_ENABLED
            return(1);
#else
            return(0);
#endif
1008
        case XML_WITH_DEBUG_MEM:
1009 1010 1011 1012 1013
#ifdef DEBUG_MEMORY_LOCATION
            return(1);
#else
            return(0);
#endif
1014
        case XML_WITH_DEBUG_RUN:
1015 1016 1017 1018
#ifdef LIBXML_DEBUG_RUNTIME
            return(1);
#else
            return(0);
1019
#endif
1020 1021 1022 1023 1024
        case XML_WITH_ZLIB:
#ifdef LIBXML_ZLIB_ENABLED
            return(1);
#else
            return(0);
1025 1026 1027 1028 1029 1030
#endif
        case XML_WITH_LZMA:
#ifdef LIBXML_LZMA_ENABLED
            return(1);
#else
            return(0);
1031 1032 1033 1034 1035 1036
#endif
        case XML_WITH_ICU:
#ifdef LIBXML_ICU_ENABLED
            return(1);
#else
            return(0);
1037
#endif
1038 1039 1040 1041 1042 1043
        default:
	    break;
     }
     return(0);
}

1044 1045
/************************************************************************
 *									*
1046
 *		SAX2 defaulted attributes handling			*
1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058
 *									*
 ************************************************************************/

/**
 * xmlDetectSAX2:
 * @ctxt:  an XML parser context
 *
 * Do the SAX2 detection and specific intialization
 */
static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
    if (ctxt == NULL) return;
1059
#ifdef LIBXML_SAX1_ENABLED
1060 1061 1062
    if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
        ((ctxt->sax->startElementNs != NULL) ||
         (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1063 1064 1065
#else
    ctxt->sax2 = 1;
#endif /* LIBXML_SAX1_ENABLED */
1066 1067 1068 1069

    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1070 1071
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
		(ctxt->str_xml_ns == NULL)) {
1072 1073
        xmlErrMemory(ctxt, NULL);
    }
1074 1075 1076 1077 1078 1079 1080
}

typedef struct _xmlDefAttrs xmlDefAttrs;
typedef xmlDefAttrs *xmlDefAttrsPtr;
struct _xmlDefAttrs {
    int nbAttrs;	/* number of defaulted attributes on that element */
    int maxAttrs;       /* the size of the array */
1081
    const xmlChar *values[5]; /* array of localname/prefix/values/external */
1082 1083
};

1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134
/**
 * xmlAttrNormalizeSpace:
 * @src: the source string
 * @dst: the target string
 *
 * Normalize the space in non CDATA attribute values:
 * If the attribute type is not CDATA, then the XML processor MUST further
 * process the normalized attribute value by discarding any leading and
 * trailing space (#x20) characters, and by replacing sequences of space
 * (#x20) characters by a single space (#x20) character.
 * Note that the size of dst need to be at least src, and if one doesn't need
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
 * passing src as dst is just fine.
 *
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
 *         is needed.
 */
static xmlChar *
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
{
    if ((src == NULL) || (dst == NULL))
        return(NULL);

    while (*src == 0x20) src++;
    while (*src != 0) {
	if (*src == 0x20) {
	    while (*src == 0x20) src++;
	    if (*src != 0)
		*dst++ = 0x20;
	} else {
	    *dst++ = *src++;
	}
    }
    *dst = 0;
    if (dst == src)
       return(NULL);
    return(dst);
}

/**
 * xmlAttrNormalizeSpace2:
 * @src: the source string
 *
 * Normalize the space in non CDATA attribute values, a slightly more complex
 * front end to avoid allocation problems when running on attribute values
 * coming from the input.
 *
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
 *         is needed.
 */
static const xmlChar *
1135
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175
{
    int i;
    int remove_head = 0;
    int need_realloc = 0;
    const xmlChar *cur;

    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
        return(NULL);
    i = *len;
    if (i <= 0)
        return(NULL);

    cur = src;
    while (*cur == 0x20) {
        cur++;
	remove_head++;
    }
    while (*cur != 0) {
	if (*cur == 0x20) {
	    cur++;
	    if ((*cur == 0x20) || (*cur == 0)) {
	        need_realloc = 1;
		break;
	    }
	} else
	    cur++;
    }
    if (need_realloc) {
        xmlChar *ret;

	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
	if (ret == NULL) {
	    xmlErrMemory(ctxt, NULL);
	    return(NULL);
	}
	xmlAttrNormalizeSpace(ret, ret);
	*len = (int) strlen((const char *)ret);
        return(ret);
    } else if (remove_head) {
        *len -= remove_head;
1176 1177
        memmove(src, src + remove_head, 1 + *len);
	return(src);
1178 1179 1180 1181
    }
    return(NULL);
}

1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200
/**
 * xmlAddDefAttrs:
 * @ctxt:  an XML parser context
 * @fullname:  the element fullname
 * @fullattr:  the attribute fullname
 * @value:  the attribute value
 *
 * Add a defaulted attribute for an element
 */
static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
               const xmlChar *fullname,
               const xmlChar *fullattr,
               const xmlChar *value) {
    xmlDefAttrsPtr defaults;
    int len;
    const xmlChar *name;
    const xmlChar *prefix;

1201 1202 1203 1204 1205 1206 1207 1208
    /*
     * Allows to detect attribute redefinitions
     */
    if (ctxt->attsSpecial != NULL) {
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
	    return;
    }

1209
    if (ctxt->attsDefault == NULL) {
1210
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1211 1212 1213 1214 1215
	if (ctxt->attsDefault == NULL)
	    goto mem_error;
    }

    /*
1216 1217
     * split the element name into prefix:localname , the string found
     * are within the DTD and then not associated to namespace names.
1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233
     */
    name = xmlSplitQName3(fullname, &len);
    if (name == NULL) {
        name = xmlDictLookup(ctxt->dict, fullname, -1);
	prefix = NULL;
    } else {
        name = xmlDictLookup(ctxt->dict, name, -1);
	prefix = xmlDictLookup(ctxt->dict, fullname, len);
    }

    /*
     * make sure there is some storage
     */
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
    if (defaults == NULL) {
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1234
	                   (4 * 5) * sizeof(const xmlChar *));
1235 1236 1237
	if (defaults == NULL)
	    goto mem_error;
	defaults->nbAttrs = 0;
1238
	defaults->maxAttrs = 4;
1239 1240 1241 1242 1243
	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
	                        defaults, NULL) < 0) {
	    xmlFree(defaults);
	    goto mem_error;
	}
1244
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1245 1246 1247
        xmlDefAttrsPtr temp;

        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1248
		       (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1249
	if (temp == NULL)
1250
	    goto mem_error;
1251
	defaults = temp;
1252
	defaults->maxAttrs *= 2;
1253 1254 1255 1256 1257
	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
	                        defaults, NULL) < 0) {
	    xmlFree(defaults);
	    goto mem_error;
	}
1258 1259 1260
    }

    /*
1261
     * Split the element name into prefix:localname , the string found
1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272
     * are within the DTD and hen not associated to namespace names.
     */
    name = xmlSplitQName3(fullattr, &len);
    if (name == NULL) {
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
	prefix = NULL;
    } else {
        name = xmlDictLookup(ctxt->dict, name, -1);
	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
    }

1273 1274
    defaults->values[5 * defaults->nbAttrs] = name;
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1275 1276 1277
    /* intern the string and precompute the end */
    len = xmlStrlen(value);
    value = xmlDictLookup(ctxt->dict, value, len);
1278 1279 1280 1281 1282 1283
    defaults->values[5 * defaults->nbAttrs + 2] = value;
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
    if (ctxt->external)
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
    else
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1284 1285 1286 1287 1288
    defaults->nbAttrs++;

    return;

mem_error:
1289
    xmlErrMemory(ctxt, NULL);
1290 1291 1292
    return;
}

1293 1294 1295 1296 1297 1298 1299
/**
 * xmlAddSpecialAttr:
 * @ctxt:  an XML parser context
 * @fullname:  the element fullname
 * @fullattr:  the attribute fullname
 * @type:  the attribute type
 *
1300
 * Register this attribute type
1301 1302 1303 1304 1305 1306 1307 1308
 */
static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
		  const xmlChar *fullname,
		  const xmlChar *fullattr,
		  int type)
{
    if (ctxt->attsSpecial == NULL) {
1309
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1310 1311 1312 1313
	if (ctxt->attsSpecial == NULL)
	    goto mem_error;
    }

1314 1315 1316
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
        return;

1317 1318
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
                     (void *) (long) type);
1319 1320 1321
    return;

mem_error:
1322
    xmlErrMemory(ctxt, NULL);
1323 1324 1325
    return;
}

1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336
/**
 * xmlCleanSpecialAttrCallback:
 *
 * Removes CDATA attributes from the special attribute table
 */
static void
xmlCleanSpecialAttrCallback(void *payload, void *data,
                            const xmlChar *fullname, const xmlChar *fullattr,
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;

1337
    if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
    }
}

/**
 * xmlCleanSpecialAttr:
 * @ctxt:  an XML parser context
 *
 * Trim the list of attributes defined to remove all those of type
 * CDATA as they are not special. This call should be done when finishing
 * to parse the DTD and before starting to parse the document root.
 */
static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
{
    if (ctxt->attsSpecial == NULL)
        return;

    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);

    if (xmlHashSize(ctxt->attsSpecial) == 0) {
        xmlHashFree(ctxt->attsSpecial, NULL);
        ctxt->attsSpecial = NULL;
    }
    return;
}

1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380
/**
 * xmlCheckLanguageID:
 * @lang:  pointer to the string value
 *
 * Checks that the value conforms to the LanguageID production:
 *
 * NOTE: this is somewhat deprecated, those productions were removed from
 *       the XML Second edition.
 *
 * [33] LanguageID ::= Langcode ('-' Subcode)*
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
 * [38] Subcode ::= ([a-z] | [A-Z])+
 *
Daniel Veillard's avatar