blob: a2f9f270f9076742cc03f39530f99c7343426779 [file] [log] [blame]
rjw1f884582022-01-06 17:20:42 +08001/*
2 * Summary: the core parser module
3 * Description: Interfaces, constants and types related to the XML parser
4 *
5 * Copy: See Copyright for the status of this software.
6 *
7 * Author: Daniel Veillard
8 */
9
10#ifndef __XML_PARSER_H__
11#define __XML_PARSER_H__
12
13#include <stdarg.h>
14
15#include <libxml/xmlversion.h>
16#include <libxml/tree.h>
17#include <libxml/dict.h>
18#include <libxml/hash.h>
19#include <libxml/valid.h>
20#include <libxml/entities.h>
21#include <libxml/xmlerror.h>
22#include <libxml/xmlstring.h>
23
24#ifdef __cplusplus
25extern "C" {
26#endif
27
28/**
29 * XML_DEFAULT_VERSION:
30 *
31 * The default version of XML used: 1.0
32 */
33#define XML_DEFAULT_VERSION "1.0"
34
35/**
36 * xmlParserInput:
37 *
38 * An xmlParserInput is an input flow for the XML processor.
39 * Each entity parsed is associated an xmlParserInput (except the
40 * few predefined ones). This is the case both for internal entities
41 * - in which case the flow is already completely in memory - or
42 * external entities - in which case we use the buf structure for
43 * progressive reading and I18N conversions to the internal UTF-8 format.
44 */
45
46/**
47 * xmlParserInputDeallocate:
48 * @str: the string to deallocate
49 *
50 * Callback for freeing some parser input allocations.
51 */
52typedef void (* xmlParserInputDeallocate)(xmlChar *str);
53
54struct _xmlParserInput {
55 /* Input buffer */
56 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */
57
58 const char *filename; /* The file analyzed, if any */
59 const char *directory; /* the directory/base of the file */
60 const xmlChar *base; /* Base of the array to parse */
61 const xmlChar *cur; /* Current char being parsed */
62 const xmlChar *end; /* end of the array to parse */
63 int length; /* length if known */
64 int line; /* Current line */
65 int col; /* Current column */
66 /*
67 * NOTE: consumed is only tested for equality in the parser code,
68 * so even if there is an overflow this should not give troubles
69 * for parsing very large instances.
70 */
71 unsigned long consumed; /* How many xmlChars already consumed */
72 xmlParserInputDeallocate free; /* function to deallocate the base */
73 const xmlChar *encoding; /* the encoding string for entity */
74 const xmlChar *version; /* the version string for entity */
75 int standalone; /* Was that entity marked standalone */
76 int id; /* an unique identifier for the entity */
77};
78
79/**
80 * xmlParserNodeInfo:
81 *
82 * The parser can be asked to collect Node informations, i.e. at what
83 * place in the file they were detected.
84 * NOTE: This is off by default and not very well tested.
85 */
86typedef struct _xmlParserNodeInfo xmlParserNodeInfo;
87typedef xmlParserNodeInfo *xmlParserNodeInfoPtr;
88
89struct _xmlParserNodeInfo {
90 const struct _xmlNode* node;
91 /* Position & line # that text that created the node begins & ends on */
92 unsigned long begin_pos;
93 unsigned long begin_line;
94 unsigned long end_pos;
95 unsigned long end_line;
96};
97
98typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
99typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
100struct _xmlParserNodeInfoSeq {
101 unsigned long maximum;
102 unsigned long length;
103 xmlParserNodeInfo* buffer;
104};
105
106/**
107 * xmlParserInputState:
108 *
109 * The parser is now working also as a state based parser.
110 * The recursive one use the state info for entities processing.
111 */
112typedef enum {
113 XML_PARSER_EOF = -1, /* nothing is to be parsed */
114 XML_PARSER_START = 0, /* nothing has been parsed */
115 XML_PARSER_MISC, /* Misc* before int subset */
116 XML_PARSER_PI, /* Within a processing instruction */
117 XML_PARSER_DTD, /* within some DTD content */
118 XML_PARSER_PROLOG, /* Misc* after internal subset */
119 XML_PARSER_COMMENT, /* within a comment */
120 XML_PARSER_START_TAG, /* within a start tag */
121 XML_PARSER_CONTENT, /* within the content */
122 XML_PARSER_CDATA_SECTION, /* within a CDATA section */
123 XML_PARSER_END_TAG, /* within a closing tag */
124 XML_PARSER_ENTITY_DECL, /* within an entity declaration */
125 XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
126 XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
127 XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */
128 XML_PARSER_EPILOG, /* the Misc* after the last end tag */
129 XML_PARSER_IGNORE, /* within an IGNORED section */
130 XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */
131} xmlParserInputState;
132
133/**
134 * XML_DETECT_IDS:
135 *
136 * Bit in the loadsubset context field to tell to do ID/REFs lookups.
137 * Use it to initialize xmlLoadExtDtdDefaultValue.
138 */
139#define XML_DETECT_IDS 2
140
141/**
142 * XML_COMPLETE_ATTRS:
143 *
144 * Bit in the loadsubset context field to tell to do complete the
145 * elements attributes lists with the ones defaulted from the DTDs.
146 * Use it to initialize xmlLoadExtDtdDefaultValue.
147 */
148#define XML_COMPLETE_ATTRS 4
149
150/**
151 * XML_SKIP_IDS:
152 *
153 * Bit in the loadsubset context field to tell to not do ID/REFs registration.
154 * Used to initialize xmlLoadExtDtdDefaultValue in some special cases.
155 */
156#define XML_SKIP_IDS 8
157
158/**
159 * xmlParserMode:
160 *
161 * A parser can operate in various modes
162 */
163typedef enum {
164 XML_PARSE_UNKNOWN = 0,
165 XML_PARSE_DOM = 1,
166 XML_PARSE_SAX = 2,
167 XML_PARSE_PUSH_DOM = 3,
168 XML_PARSE_PUSH_SAX = 4,
169 XML_PARSE_READER = 5
170} xmlParserMode;
171
172/**
173 * xmlParserCtxt:
174 *
175 * The parser context.
176 * NOTE This doesn't completely define the parser state, the (current ?)
177 * design of the parser uses recursive function calls since this allow
178 * and easy mapping from the production rules of the specification
179 * to the actual code. The drawback is that the actual function call
180 * also reflect the parser state. However most of the parsing routines
181 * takes as the only argument the parser context pointer, so migrating
182 * to a state based parser for progressive parsing shouldn't be too hard.
183 */
184struct _xmlParserCtxt {
185 struct _xmlSAXHandler *sax; /* The SAX handler */
186 void *userData; /* For SAX interface only, used by DOM build */
187 xmlDocPtr myDoc; /* the document being built */
188 int wellFormed; /* is the document well formed */
189 int replaceEntities; /* shall we replace entities ? */
190 const xmlChar *version; /* the XML version string */
191 const xmlChar *encoding; /* the declared encoding, if any */
192 int standalone; /* standalone document */
193 int html; /* an HTML(1)/Docbook(2) document
194 * 3 is HTML after <head>
195 * 10 is HTML after <body>
196 */
197
198 /* Input stream stack */
199 xmlParserInputPtr input; /* Current input stream */
200 int inputNr; /* Number of current input streams */
201 int inputMax; /* Max number of input streams */
202 xmlParserInputPtr *inputTab; /* stack of inputs */
203
204 /* Node analysis stack only used for DOM building */
205 xmlNodePtr node; /* Current parsed Node */
206 int nodeNr; /* Depth of the parsing stack */
207 int nodeMax; /* Max depth of the parsing stack */
208 xmlNodePtr *nodeTab; /* array of nodes */
209
210 int record_info; /* Whether node info should be kept */
211 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */
212
213 int errNo; /* error code */
214
215 int hasExternalSubset; /* reference and external subset */
216 int hasPErefs; /* the internal subset has PE refs */
217 int external; /* are we parsing an external entity */
218
219 int valid; /* is the document valid */
220 int validate; /* shall we try to validate ? */
221 xmlValidCtxt vctxt; /* The validity context */
222
223 xmlParserInputState instate; /* current type of input */
224 int token; /* next char look-ahead */
225
226 char *directory; /* the data directory */
227
228 /* Node name stack */
229 const xmlChar *name; /* Current parsed Node */
230 int nameNr; /* Depth of the parsing stack */
231 int nameMax; /* Max depth of the parsing stack */
232 const xmlChar * *nameTab; /* array of nodes */
233
234 long nbChars; /* number of xmlChar processed */
235 long checkIndex; /* used by progressive parsing lookup */
236 int keepBlanks; /* ugly but ... */
237 int disableSAX; /* SAX callbacks are disabled */
238 int inSubset; /* Parsing is in int 1/ext 2 subset */
239 const xmlChar * intSubName; /* name of subset */
240 xmlChar * extSubURI; /* URI of external subset */
241 xmlChar * extSubSystem; /* SYSTEM ID of external subset */
242
243 /* xml:space values */
244 int * space; /* Should the parser preserve spaces */
245 int spaceNr; /* Depth of the parsing stack */
246 int spaceMax; /* Max depth of the parsing stack */
247 int * spaceTab; /* array of space infos */
248
249 int depth; /* to prevent entity substitution loops */
250 xmlParserInputPtr entity; /* used to check entities boundaries */
251 int charset; /* encoding of the in-memory content
252 actually an xmlCharEncoding */
253 int nodelen; /* Those two fields are there to */
254 int nodemem; /* Speed up large node parsing */
255 int pedantic; /* signal pedantic warnings */
256 void *_private; /* For user data, libxml won't touch it */
257
258 int loadsubset; /* should the external subset be loaded */
259 int linenumbers; /* set line number in element content */
260 void *catalogs; /* document's own catalog */
261 int recovery; /* run in recovery mode */
262 int progressive; /* is this a progressive parsing */
263 xmlDictPtr dict; /* dictionnary for the parser */
264 const xmlChar * *atts; /* array for the attributes callbacks */
265 int maxatts; /* the size of the array */
266 int docdict; /* use strings from dict to build tree */
267
268 /*
269 * pre-interned strings
270 */
271 const xmlChar *str_xml;
272 const xmlChar *str_xmlns;
273 const xmlChar *str_xml_ns;
274
275 /*
276 * Everything below is used only by the new SAX mode
277 */
278 int sax2; /* operating in the new SAX mode */
279 int nsNr; /* the number of inherited namespaces */
280 int nsMax; /* the size of the arrays */
281 const xmlChar * *nsTab; /* the array of prefix/namespace name */
282 int *attallocs; /* which attribute were allocated */
283 void * *pushTab; /* array of data for push */
284 xmlHashTablePtr attsDefault; /* defaulted attributes if any */
285 xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */
286 int nsWellFormed; /* is the document XML Nanespace okay */
287 int options; /* Extra options */
288
289 /*
290 * Those fields are needed only for treaming parsing so far
291 */
292 int dictNames; /* Use dictionary names for the tree */
293 int freeElemsNr; /* number of freed element nodes */
294 xmlNodePtr freeElems; /* List of freed element nodes */
295 int freeAttrsNr; /* number of freed attributes nodes */
296 xmlAttrPtr freeAttrs; /* List of freed attributes nodes */
297
298 /*
299 * the complete error informations for the last error.
300 */
301 xmlError lastError;
302 xmlParserMode parseMode; /* the parser mode */
303 unsigned long nbentities; /* number of entities references */
304 unsigned long sizeentities; /* size of parsed entities */
305
306 /* for use by HTML non-recursive parser */
307 xmlParserNodeInfo *nodeInfo; /* Current NodeInfo */
308 int nodeInfoNr; /* Depth of the parsing stack */
309 int nodeInfoMax; /* Max depth of the parsing stack */
310 xmlParserNodeInfo *nodeInfoTab; /* array of nodeInfos */
311};
312
313/**
314 * xmlSAXLocator:
315 *
316 * A SAX Locator.
317 */
318struct _xmlSAXLocator {
319 const xmlChar *(*getPublicId)(void *ctx);
320 const xmlChar *(*getSystemId)(void *ctx);
321 int (*getLineNumber)(void *ctx);
322 int (*getColumnNumber)(void *ctx);
323};
324
325/**
326 * xmlSAXHandler:
327 *
328 * A SAX handler is bunch of callbacks called by the parser when processing
329 * of the input generate data or structure informations.
330 */
331
332/**
333 * resolveEntitySAXFunc:
334 * @ctx: the user data (XML parser context)
335 * @publicId: The public ID of the entity
336 * @systemId: The system ID of the entity
337 *
338 * Callback:
339 * The entity loader, to control the loading of external entities,
340 * the application can either:
341 * - override this resolveEntity() callback in the SAX block
342 * - or better use the xmlSetExternalEntityLoader() function to
343 * set up it's own entity resolution routine
344 *
345 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
346 */
347typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
348 const xmlChar *publicId,
349 const xmlChar *systemId);
350/**
351 * internalSubsetSAXFunc:
352 * @ctx: the user data (XML parser context)
353 * @name: the root element name
354 * @ExternalID: the external ID
355 * @SystemID: the SYSTEM ID (e.g. filename or URL)
356 *
357 * Callback on internal subset declaration.
358 */
359typedef void (*internalSubsetSAXFunc) (void *ctx,
360 const xmlChar *name,
361 const xmlChar *ExternalID,
362 const xmlChar *SystemID);
363/**
364 * externalSubsetSAXFunc:
365 * @ctx: the user data (XML parser context)
366 * @name: the root element name
367 * @ExternalID: the external ID
368 * @SystemID: the SYSTEM ID (e.g. filename or URL)
369 *
370 * Callback on external subset declaration.
371 */
372typedef void (*externalSubsetSAXFunc) (void *ctx,
373 const xmlChar *name,
374 const xmlChar *ExternalID,
375 const xmlChar *SystemID);
376/**
377 * getEntitySAXFunc:
378 * @ctx: the user data (XML parser context)
379 * @name: The entity name
380 *
381 * Get an entity by name.
382 *
383 * Returns the xmlEntityPtr if found.
384 */
385typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
386 const xmlChar *name);
387/**
388 * getParameterEntitySAXFunc:
389 * @ctx: the user data (XML parser context)
390 * @name: The entity name
391 *
392 * Get a parameter entity by name.
393 *
394 * Returns the xmlEntityPtr if found.
395 */
396typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
397 const xmlChar *name);
398/**
399 * entityDeclSAXFunc:
400 * @ctx: the user data (XML parser context)
401 * @name: the entity name
402 * @type: the entity type
403 * @publicId: The public ID of the entity
404 * @systemId: The system ID of the entity
405 * @content: the entity value (without processing).
406 *
407 * An entity definition has been parsed.
408 */
409typedef void (*entityDeclSAXFunc) (void *ctx,
410 const xmlChar *name,
411 int type,
412 const xmlChar *publicId,
413 const xmlChar *systemId,
414 xmlChar *content);
415/**
416 * notationDeclSAXFunc:
417 * @ctx: the user data (XML parser context)
418 * @name: The name of the notation
419 * @publicId: The public ID of the entity
420 * @systemId: The system ID of the entity
421 *
422 * What to do when a notation declaration has been parsed.
423 */
424typedef void (*notationDeclSAXFunc)(void *ctx,
425 const xmlChar *name,
426 const xmlChar *publicId,
427 const xmlChar *systemId);
428/**
429 * attributeDeclSAXFunc:
430 * @ctx: the user data (XML parser context)
431 * @elem: the name of the element
432 * @fullname: the attribute name
433 * @type: the attribute type
434 * @def: the type of default value
435 * @defaultValue: the attribute default value
436 * @tree: the tree of enumerated value set
437 *
438 * An attribute definition has been parsed.
439 */
440typedef void (*attributeDeclSAXFunc)(void *ctx,
441 const xmlChar *elem,
442 const xmlChar *fullname,
443 int type,
444 int def,
445 const xmlChar *defaultValue,
446 xmlEnumerationPtr tree);
447/**
448 * elementDeclSAXFunc:
449 * @ctx: the user data (XML parser context)
450 * @name: the element name
451 * @type: the element type
452 * @content: the element value tree
453 *
454 * An element definition has been parsed.
455 */
456typedef void (*elementDeclSAXFunc)(void *ctx,
457 const xmlChar *name,
458 int type,
459 xmlElementContentPtr content);
460/**
461 * unparsedEntityDeclSAXFunc:
462 * @ctx: the user data (XML parser context)
463 * @name: The name of the entity
464 * @publicId: The public ID of the entity
465 * @systemId: The system ID of the entity
466 * @notationName: the name of the notation
467 *
468 * What to do when an unparsed entity declaration is parsed.
469 */
470typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
471 const xmlChar *name,
472 const xmlChar *publicId,
473 const xmlChar *systemId,
474 const xmlChar *notationName);
475/**
476 * setDocumentLocatorSAXFunc:
477 * @ctx: the user data (XML parser context)
478 * @loc: A SAX Locator
479 *
480 * Receive the document locator at startup, actually xmlDefaultSAXLocator.
481 * Everything is available on the context, so this is useless in our case.
482 */
483typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
484 xmlSAXLocatorPtr loc);
485/**
486 * startDocumentSAXFunc:
487 * @ctx: the user data (XML parser context)
488 *
489 * Called when the document start being processed.
490 */
491typedef void (*startDocumentSAXFunc) (void *ctx);
492/**
493 * endDocumentSAXFunc:
494 * @ctx: the user data (XML parser context)
495 *
496 * Called when the document end has been detected.
497 */
498typedef void (*endDocumentSAXFunc) (void *ctx);
499/**
500 * startElementSAXFunc:
501 * @ctx: the user data (XML parser context)
502 * @name: The element name, including namespace prefix
503 * @atts: An array of name/value attributes pairs, NULL terminated
504 *
505 * Called when an opening tag has been processed.
506 */
507typedef void (*startElementSAXFunc) (void *ctx,
508 const xmlChar *name,
509 const xmlChar **atts);
510/**
511 * endElementSAXFunc:
512 * @ctx: the user data (XML parser context)
513 * @name: The element name
514 *
515 * Called when the end of an element has been detected.
516 */
517typedef void (*endElementSAXFunc) (void *ctx,
518 const xmlChar *name);
519/**
520 * attributeSAXFunc:
521 * @ctx: the user data (XML parser context)
522 * @name: The attribute name, including namespace prefix
523 * @value: The attribute value
524 *
525 * Handle an attribute that has been read by the parser.
526 * The default handling is to convert the attribute into an
527 * DOM subtree and past it in a new xmlAttr element added to
528 * the element.
529 */
530typedef void (*attributeSAXFunc) (void *ctx,
531 const xmlChar *name,
532 const xmlChar *value);
533/**
534 * referenceSAXFunc:
535 * @ctx: the user data (XML parser context)
536 * @name: The entity name
537 *
538 * Called when an entity reference is detected.
539 */
540typedef void (*referenceSAXFunc) (void *ctx,
541 const xmlChar *name);
542/**
543 * charactersSAXFunc:
544 * @ctx: the user data (XML parser context)
545 * @ch: a xmlChar string
546 * @len: the number of xmlChar
547 *
548 * Receiving some chars from the parser.
549 */
550typedef void (*charactersSAXFunc) (void *ctx,
551 const xmlChar *ch,
552 int len);
553/**
554 * ignorableWhitespaceSAXFunc:
555 * @ctx: the user data (XML parser context)
556 * @ch: a xmlChar string
557 * @len: the number of xmlChar
558 *
559 * Receiving some ignorable whitespaces from the parser.
560 * UNUSED: by default the DOM building will use characters.
561 */
562typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
563 const xmlChar *ch,
564 int len);
565/**
566 * processingInstructionSAXFunc:
567 * @ctx: the user data (XML parser context)
568 * @target: the target name
569 * @data: the PI data's
570 *
571 * A processing instruction has been parsed.
572 */
573typedef void (*processingInstructionSAXFunc) (void *ctx,
574 const xmlChar *target,
575 const xmlChar *data);
576/**
577 * commentSAXFunc:
578 * @ctx: the user data (XML parser context)
579 * @value: the comment content
580 *
581 * A comment has been parsed.
582 */
583typedef void (*commentSAXFunc) (void *ctx,
584 const xmlChar *value);
585/**
586 * cdataBlockSAXFunc:
587 * @ctx: the user data (XML parser context)
588 * @value: The pcdata content
589 * @len: the block length
590 *
591 * Called when a pcdata block has been parsed.
592 */
593typedef void (*cdataBlockSAXFunc) (
594 void *ctx,
595 const xmlChar *value,
596 int len);
597/**
598 * warningSAXFunc:
599 * @ctx: an XML parser context
600 * @msg: the message to display/transmit
601 * @...: extra parameters for the message display
602 *
603 * Display and format a warning messages, callback.
604 */
605typedef void (XMLCDECL *warningSAXFunc) (void *ctx,
606 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3);
607/**
608 * errorSAXFunc:
609 * @ctx: an XML parser context
610 * @msg: the message to display/transmit
611 * @...: extra parameters for the message display
612 *
613 * Display and format an error messages, callback.
614 */
615typedef void (XMLCDECL *errorSAXFunc) (void *ctx,
616 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3);
617/**
618 * fatalErrorSAXFunc:
619 * @ctx: an XML parser context
620 * @msg: the message to display/transmit
621 * @...: extra parameters for the message display
622 *
623 * Display and format fatal error messages, callback.
624 * Note: so far fatalError() SAX callbacks are not used, error()
625 * get all the callbacks for errors.
626 */
627typedef void (XMLCDECL *fatalErrorSAXFunc) (void *ctx,
628 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3);
629/**
630 * isStandaloneSAXFunc:
631 * @ctx: the user data (XML parser context)
632 *
633 * Is this document tagged standalone?
634 *
635 * Returns 1 if true
636 */
637typedef int (*isStandaloneSAXFunc) (void *ctx);
638/**
639 * hasInternalSubsetSAXFunc:
640 * @ctx: the user data (XML parser context)
641 *
642 * Does this document has an internal subset.
643 *
644 * Returns 1 if true
645 */
646typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
647
648/**
649 * hasExternalSubsetSAXFunc:
650 * @ctx: the user data (XML parser context)
651 *
652 * Does this document has an external subset?
653 *
654 * Returns 1 if true
655 */
656typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
657
658/************************************************************************
659 * *
660 * The SAX version 2 API extensions *
661 * *
662 ************************************************************************/
663/**
664 * XML_SAX2_MAGIC:
665 *
666 * Special constant found in SAX2 blocks initialized fields
667 */
668#define XML_SAX2_MAGIC 0xDEEDBEAF
669
670/**
671 * startElementNsSAX2Func:
672 * @ctx: the user data (XML parser context)
673 * @localname: the local name of the element
674 * @prefix: the element namespace prefix if available
675 * @URI: the element namespace name if available
676 * @nb_namespaces: number of namespace definitions on that node
677 * @namespaces: pointer to the array of prefix/URI pairs namespace definitions
678 * @nb_attributes: the number of attributes on that node
679 * @nb_defaulted: the number of defaulted attributes. The defaulted
680 * ones are at the end of the array
681 * @attributes: pointer to the array of (localname/prefix/URI/value/end)
682 * attribute values.
683 *
684 * SAX2 callback when an element start has been detected by the parser.
685 * It provides the namespace informations for the element, as well as
686 * the new namespace declarations on the element.
687 */
688
689typedef void (*startElementNsSAX2Func) (void *ctx,
690 const xmlChar *localname,
691 const xmlChar *prefix,
692 const xmlChar *URI,
693 int nb_namespaces,
694 const xmlChar **namespaces,
695 int nb_attributes,
696 int nb_defaulted,
697 const xmlChar **attributes);
698
699/**
700 * endElementNsSAX2Func:
701 * @ctx: the user data (XML parser context)
702 * @localname: the local name of the element
703 * @prefix: the element namespace prefix if available
704 * @URI: the element namespace name if available
705 *
706 * SAX2 callback when an element end has been detected by the parser.
707 * It provides the namespace informations for the element.
708 */
709
710typedef void (*endElementNsSAX2Func) (void *ctx,
711 const xmlChar *localname,
712 const xmlChar *prefix,
713 const xmlChar *URI);
714
715
716struct _xmlSAXHandler {
717 internalSubsetSAXFunc internalSubset;
718 isStandaloneSAXFunc isStandalone;
719 hasInternalSubsetSAXFunc hasInternalSubset;
720 hasExternalSubsetSAXFunc hasExternalSubset;
721 resolveEntitySAXFunc resolveEntity;
722 getEntitySAXFunc getEntity;
723 entityDeclSAXFunc entityDecl;
724 notationDeclSAXFunc notationDecl;
725 attributeDeclSAXFunc attributeDecl;
726 elementDeclSAXFunc elementDecl;
727 unparsedEntityDeclSAXFunc unparsedEntityDecl;
728 setDocumentLocatorSAXFunc setDocumentLocator;
729 startDocumentSAXFunc startDocument;
730 endDocumentSAXFunc endDocument;
731 startElementSAXFunc startElement;
732 endElementSAXFunc endElement;
733 referenceSAXFunc reference;
734 charactersSAXFunc characters;
735 ignorableWhitespaceSAXFunc ignorableWhitespace;
736 processingInstructionSAXFunc processingInstruction;
737 commentSAXFunc comment;
738 warningSAXFunc warning;
739 errorSAXFunc error;
740 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
741 getParameterEntitySAXFunc getParameterEntity;
742 cdataBlockSAXFunc cdataBlock;
743 externalSubsetSAXFunc externalSubset;
744 unsigned int initialized;
745 /* The following fields are extensions available only on version 2 */
746 void *_private;
747 startElementNsSAX2Func startElementNs;
748 endElementNsSAX2Func endElementNs;
749 xmlStructuredErrorFunc serror;
750};
751
752/*
753 * SAX Version 1
754 */
755typedef struct _xmlSAXHandlerV1 xmlSAXHandlerV1;
756typedef xmlSAXHandlerV1 *xmlSAXHandlerV1Ptr;
757struct _xmlSAXHandlerV1 {
758 internalSubsetSAXFunc internalSubset;
759 isStandaloneSAXFunc isStandalone;
760 hasInternalSubsetSAXFunc hasInternalSubset;
761 hasExternalSubsetSAXFunc hasExternalSubset;
762 resolveEntitySAXFunc resolveEntity;
763 getEntitySAXFunc getEntity;
764 entityDeclSAXFunc entityDecl;
765 notationDeclSAXFunc notationDecl;
766 attributeDeclSAXFunc attributeDecl;
767 elementDeclSAXFunc elementDecl;
768 unparsedEntityDeclSAXFunc unparsedEntityDecl;
769 setDocumentLocatorSAXFunc setDocumentLocator;
770 startDocumentSAXFunc startDocument;
771 endDocumentSAXFunc endDocument;
772 startElementSAXFunc startElement;
773 endElementSAXFunc endElement;
774 referenceSAXFunc reference;
775 charactersSAXFunc characters;
776 ignorableWhitespaceSAXFunc ignorableWhitespace;
777 processingInstructionSAXFunc processingInstruction;
778 commentSAXFunc comment;
779 warningSAXFunc warning;
780 errorSAXFunc error;
781 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
782 getParameterEntitySAXFunc getParameterEntity;
783 cdataBlockSAXFunc cdataBlock;
784 externalSubsetSAXFunc externalSubset;
785 unsigned int initialized;
786};
787
788
789/**
790 * xmlExternalEntityLoader:
791 * @URL: The System ID of the resource requested
792 * @ID: The Public ID of the resource requested
793 * @context: the XML parser context
794 *
795 * External entity loaders types.
796 *
797 * Returns the entity input parser.
798 */
799typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL,
800 const char *ID,
801 xmlParserCtxtPtr context);
802
803#ifdef __cplusplus
804}
805#endif
806
807#include <libxml/encoding.h>
808#include <libxml/xmlIO.h>
809#include <libxml/globals.h>
810
811#ifdef __cplusplus
812extern "C" {
813#endif
814
815
816/*
817 * Init/Cleanup
818 */
819XMLPUBFUN void XMLCALL
820 xmlInitParser (void);
821XMLPUBFUN void XMLCALL
822 xmlCleanupParser (void);
823
824/*
825 * Input functions
826 */
827XMLPUBFUN int XMLCALL
828 xmlParserInputRead (xmlParserInputPtr in,
829 int len);
830XMLPUBFUN int XMLCALL
831 xmlParserInputGrow (xmlParserInputPtr in,
832 int len);
833
834/*
835 * Basic parsing Interfaces
836 */
837#ifdef LIBXML_SAX1_ENABLED
838XMLPUBFUN xmlDocPtr XMLCALL
839 xmlParseDoc (const xmlChar *cur);
840XMLPUBFUN xmlDocPtr XMLCALL
841 xmlParseFile (const char *filename);
842XMLPUBFUN xmlDocPtr XMLCALL
843 xmlParseMemory (const char *buffer,
844 int size);
845#endif /* LIBXML_SAX1_ENABLED */
846XMLPUBFUN int XMLCALL
847 xmlSubstituteEntitiesDefault(int val);
848XMLPUBFUN int XMLCALL
849 xmlKeepBlanksDefault (int val);
850XMLPUBFUN void XMLCALL
851 xmlStopParser (xmlParserCtxtPtr ctxt);
852XMLPUBFUN int XMLCALL
853 xmlPedanticParserDefault(int val);
854XMLPUBFUN int XMLCALL
855 xmlLineNumbersDefault (int val);
856
857#ifdef LIBXML_SAX1_ENABLED
858/*
859 * Recovery mode
860 */
861XMLPUBFUN xmlDocPtr XMLCALL
862 xmlRecoverDoc (const xmlChar *cur);
863XMLPUBFUN xmlDocPtr XMLCALL
864 xmlRecoverMemory (const char *buffer,
865 int size);
866XMLPUBFUN xmlDocPtr XMLCALL
867 xmlRecoverFile (const char *filename);
868#endif /* LIBXML_SAX1_ENABLED */
869
870/*
871 * Less common routines and SAX interfaces
872 */
873XMLPUBFUN int XMLCALL
874 xmlParseDocument (xmlParserCtxtPtr ctxt);
875XMLPUBFUN int XMLCALL
876 xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt);
877#ifdef LIBXML_SAX1_ENABLED
878XMLPUBFUN int XMLCALL
879 xmlSAXUserParseFile (xmlSAXHandlerPtr sax,
880 void *user_data,
881 const char *filename);
882XMLPUBFUN int XMLCALL
883 xmlSAXUserParseMemory (xmlSAXHandlerPtr sax,
884 void *user_data,
885 const char *buffer,
886 int size);
887XMLPUBFUN xmlDocPtr XMLCALL
888 xmlSAXParseDoc (xmlSAXHandlerPtr sax,
889 const xmlChar *cur,
890 int recovery);
891XMLPUBFUN xmlDocPtr XMLCALL
892 xmlSAXParseMemory (xmlSAXHandlerPtr sax,
893 const char *buffer,
894 int size,
895 int recovery);
896XMLPUBFUN xmlDocPtr XMLCALL
897 xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax,
898 const char *buffer,
899 int size,
900 int recovery,
901 void *data);
902XMLPUBFUN xmlDocPtr XMLCALL
903 xmlSAXParseFile (xmlSAXHandlerPtr sax,
904 const char *filename,
905 int recovery);
906XMLPUBFUN xmlDocPtr XMLCALL
907 xmlSAXParseFileWithData (xmlSAXHandlerPtr sax,
908 const char *filename,
909 int recovery,
910 void *data);
911XMLPUBFUN xmlDocPtr XMLCALL
912 xmlSAXParseEntity (xmlSAXHandlerPtr sax,
913 const char *filename);
914XMLPUBFUN xmlDocPtr XMLCALL
915 xmlParseEntity (const char *filename);
916#endif /* LIBXML_SAX1_ENABLED */
917
918#ifdef LIBXML_VALID_ENABLED
919XMLPUBFUN xmlDtdPtr XMLCALL
920 xmlSAXParseDTD (xmlSAXHandlerPtr sax,
921 const xmlChar *ExternalID,
922 const xmlChar *SystemID);
923XMLPUBFUN xmlDtdPtr XMLCALL
924 xmlParseDTD (const xmlChar *ExternalID,
925 const xmlChar *SystemID);
926XMLPUBFUN xmlDtdPtr XMLCALL
927 xmlIOParseDTD (xmlSAXHandlerPtr sax,
928 xmlParserInputBufferPtr input,
929 xmlCharEncoding enc);
930#endif /* LIBXML_VALID_ENABLE */
931#ifdef LIBXML_SAX1_ENABLED
932XMLPUBFUN int XMLCALL
933 xmlParseBalancedChunkMemory(xmlDocPtr doc,
934 xmlSAXHandlerPtr sax,
935 void *user_data,
936 int depth,
937 const xmlChar *string,
938 xmlNodePtr *lst);
939#endif /* LIBXML_SAX1_ENABLED */
940XMLPUBFUN xmlParserErrors XMLCALL
941 xmlParseInNodeContext (xmlNodePtr node,
942 const char *data,
943 int datalen,
944 int options,
945 xmlNodePtr *lst);
946#ifdef LIBXML_SAX1_ENABLED
947XMLPUBFUN int XMLCALL
948 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,
949 xmlSAXHandlerPtr sax,
950 void *user_data,
951 int depth,
952 const xmlChar *string,
953 xmlNodePtr *lst,
954 int recover);
955XMLPUBFUN int XMLCALL
956 xmlParseExternalEntity (xmlDocPtr doc,
957 xmlSAXHandlerPtr sax,
958 void *user_data,
959 int depth,
960 const xmlChar *URL,
961 const xmlChar *ID,
962 xmlNodePtr *lst);
963#endif /* LIBXML_SAX1_ENABLED */
964XMLPUBFUN int XMLCALL
965 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,
966 const xmlChar *URL,
967 const xmlChar *ID,
968 xmlNodePtr *lst);
969
970/*
971 * Parser contexts handling.
972 */
973XMLPUBFUN xmlParserCtxtPtr XMLCALL
974 xmlNewParserCtxt (void);
975XMLPUBFUN int XMLCALL
976 xmlInitParserCtxt (xmlParserCtxtPtr ctxt);
977XMLPUBFUN void XMLCALL
978 xmlClearParserCtxt (xmlParserCtxtPtr ctxt);
979XMLPUBFUN void XMLCALL
980 xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
981#ifdef LIBXML_SAX1_ENABLED
982XMLPUBFUN void XMLCALL
983 xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt,
984 const xmlChar* buffer,
985 const char *filename);
986#endif /* LIBXML_SAX1_ENABLED */
987XMLPUBFUN xmlParserCtxtPtr XMLCALL
988 xmlCreateDocParserCtxt (const xmlChar *cur);
989
990#ifdef LIBXML_LEGACY_ENABLED
991/*
992 * Reading/setting optional parsing features.
993 */
994XMLPUBFUN int XMLCALL
995 xmlGetFeaturesList (int *len,
996 const char **result);
997XMLPUBFUN int XMLCALL
998 xmlGetFeature (xmlParserCtxtPtr ctxt,
999 const char *name,
1000 void *result);
1001XMLPUBFUN int XMLCALL
1002 xmlSetFeature (xmlParserCtxtPtr ctxt,
1003 const char *name,
1004 void *value);
1005#endif /* LIBXML_LEGACY_ENABLED */
1006
1007#ifdef LIBXML_PUSH_ENABLED
1008/*
1009 * Interfaces for the Push mode.
1010 */
1011XMLPUBFUN xmlParserCtxtPtr XMLCALL
1012 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,
1013 void *user_data,
1014 const char *chunk,
1015 int size,
1016 const char *filename);
1017XMLPUBFUN int XMLCALL
1018 xmlParseChunk (xmlParserCtxtPtr ctxt,
1019 const char *chunk,
1020 int size,
1021 int terminate);
1022#endif /* LIBXML_PUSH_ENABLED */
1023
1024/*
1025 * Special I/O mode.
1026 */
1027
1028XMLPUBFUN xmlParserCtxtPtr XMLCALL
1029 xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax,
1030 void *user_data,
1031 xmlInputReadCallback ioread,
1032 xmlInputCloseCallback ioclose,
1033 void *ioctx,
1034 xmlCharEncoding enc);
1035
1036XMLPUBFUN xmlParserInputPtr XMLCALL
1037 xmlNewIOInputStream (xmlParserCtxtPtr ctxt,
1038 xmlParserInputBufferPtr input,
1039 xmlCharEncoding enc);
1040
1041/*
1042 * Node infos.
1043 */
1044XMLPUBFUN const xmlParserNodeInfo* XMLCALL
1045 xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt,
1046 const xmlNodePtr node);
1047XMLPUBFUN void XMLCALL
1048 xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
1049XMLPUBFUN void XMLCALL
1050 xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
1051XMLPUBFUN unsigned long XMLCALL
1052 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1053 const xmlNodePtr node);
1054XMLPUBFUN void XMLCALL
1055 xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt,
1056 const xmlParserNodeInfoPtr info);
1057
1058/*
1059 * External entities handling actually implemented in xmlIO.
1060 */
1061
1062XMLPUBFUN void XMLCALL
1063 xmlSetExternalEntityLoader(xmlExternalEntityLoader f);
1064XMLPUBFUN xmlExternalEntityLoader XMLCALL
1065 xmlGetExternalEntityLoader(void);
1066XMLPUBFUN xmlParserInputPtr XMLCALL
1067 xmlLoadExternalEntity (const char *URL,
1068 const char *ID,
1069 xmlParserCtxtPtr ctxt);
1070
1071/*
1072 * Index lookup, actually implemented in the encoding module
1073 */
1074XMLPUBFUN long XMLCALL
1075 xmlByteConsumed (xmlParserCtxtPtr ctxt);
1076
1077/*
1078 * New set of simpler/more flexible APIs
1079 */
1080/**
1081 * xmlParserOption:
1082 *
1083 * This is the set of XML parser options that can be passed down
1084 * to the xmlReadDoc() and similar calls.
1085 */
1086typedef enum {
1087 XML_PARSE_RECOVER = 1<<0, /* recover on errors */
1088 XML_PARSE_NOENT = 1<<1, /* substitute entities */
1089 XML_PARSE_DTDLOAD = 1<<2, /* load the external subset */
1090 XML_PARSE_DTDATTR = 1<<3, /* default DTD attributes */
1091 XML_PARSE_DTDVALID = 1<<4, /* validate with the DTD */
1092 XML_PARSE_NOERROR = 1<<5, /* suppress error reports */
1093 XML_PARSE_NOWARNING = 1<<6, /* suppress warning reports */
1094 XML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
1095 XML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
1096 XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */
1097 XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitition */
1098 XML_PARSE_NONET = 1<<11,/* Forbid network access */
1099 XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionnary */
1100 XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */
1101 XML_PARSE_NOCDATA = 1<<14,/* merge CDATA as text nodes */
1102 XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */
1103 XML_PARSE_COMPACT = 1<<16,/* compact small text nodes; no modification of
1104 the tree allowed afterwards (will possibly
1105 crash if you try to modify the tree) */
1106 XML_PARSE_OLD10 = 1<<17,/* parse using XML-1.0 before update 5 */
1107 XML_PARSE_NOBASEFIX = 1<<18,/* do not fixup XINCLUDE xml:base uris */
1108 XML_PARSE_HUGE = 1<<19, /* relax any hardcoded limit from the parser */
1109 XML_PARSE_OLDSAX = 1<<20 /* parse using SAX2 interface from before 2.7.0 */
1110} xmlParserOption;
1111
1112XMLPUBFUN void XMLCALL
1113 xmlCtxtReset (xmlParserCtxtPtr ctxt);
1114XMLPUBFUN int XMLCALL
1115 xmlCtxtResetPush (xmlParserCtxtPtr ctxt,
1116 const char *chunk,
1117 int size,
1118 const char *filename,
1119 const char *encoding);
1120XMLPUBFUN int XMLCALL
1121 xmlCtxtUseOptions (xmlParserCtxtPtr ctxt,
1122 int options);
1123XMLPUBFUN xmlDocPtr XMLCALL
1124 xmlReadDoc (const xmlChar *cur,
1125 const char *URL,
1126 const char *encoding,
1127 int options);
1128XMLPUBFUN xmlDocPtr XMLCALL
1129 xmlReadFile (const char *URL,
1130 const char *encoding,
1131 int options);
1132XMLPUBFUN xmlDocPtr XMLCALL
1133 xmlReadMemory (const char *buffer,
1134 int size,
1135 const char *URL,
1136 const char *encoding,
1137 int options);
1138XMLPUBFUN xmlDocPtr XMLCALL
1139 xmlReadFd (int fd,
1140 const char *URL,
1141 const char *encoding,
1142 int options);
1143XMLPUBFUN xmlDocPtr XMLCALL
1144 xmlReadIO (xmlInputReadCallback ioread,
1145 xmlInputCloseCallback ioclose,
1146 void *ioctx,
1147 const char *URL,
1148 const char *encoding,
1149 int options);
1150XMLPUBFUN xmlDocPtr XMLCALL
1151 xmlCtxtReadDoc (xmlParserCtxtPtr ctxt,
1152 const xmlChar *cur,
1153 const char *URL,
1154 const char *encoding,
1155 int options);
1156XMLPUBFUN xmlDocPtr XMLCALL
1157 xmlCtxtReadFile (xmlParserCtxtPtr ctxt,
1158 const char *filename,
1159 const char *encoding,
1160 int options);
1161XMLPUBFUN xmlDocPtr XMLCALL
1162 xmlCtxtReadMemory (xmlParserCtxtPtr ctxt,
1163 const char *buffer,
1164 int size,
1165 const char *URL,
1166 const char *encoding,
1167 int options);
1168XMLPUBFUN xmlDocPtr XMLCALL
1169 xmlCtxtReadFd (xmlParserCtxtPtr ctxt,
1170 int fd,
1171 const char *URL,
1172 const char *encoding,
1173 int options);
1174XMLPUBFUN xmlDocPtr XMLCALL
1175 xmlCtxtReadIO (xmlParserCtxtPtr ctxt,
1176 xmlInputReadCallback ioread,
1177 xmlInputCloseCallback ioclose,
1178 void *ioctx,
1179 const char *URL,
1180 const char *encoding,
1181 int options);
1182
1183/*
1184 * Library wide options
1185 */
1186/**
1187 * xmlFeature:
1188 *
1189 * Used to examine the existance of features that can be enabled
1190 * or disabled at compile-time.
1191 * They used to be called XML_FEATURE_xxx but this clashed with Expat
1192 */
1193typedef enum {
1194 XML_WITH_THREAD = 1,
1195 XML_WITH_TREE = 2,
1196 XML_WITH_OUTPUT = 3,
1197 XML_WITH_PUSH = 4,
1198 XML_WITH_READER = 5,
1199 XML_WITH_PATTERN = 6,
1200 XML_WITH_WRITER = 7,
1201 XML_WITH_SAX1 = 8,
1202 XML_WITH_FTP = 9,
1203 XML_WITH_HTTP = 10,
1204 XML_WITH_VALID = 11,
1205 XML_WITH_HTML = 12,
1206 XML_WITH_LEGACY = 13,
1207 XML_WITH_C14N = 14,
1208 XML_WITH_CATALOG = 15,
1209 XML_WITH_XPATH = 16,
1210 XML_WITH_XPTR = 17,
1211 XML_WITH_XINCLUDE = 18,
1212 XML_WITH_ICONV = 19,
1213 XML_WITH_ISO8859X = 20,
1214 XML_WITH_UNICODE = 21,
1215 XML_WITH_REGEXP = 22,
1216 XML_WITH_AUTOMATA = 23,
1217 XML_WITH_EXPR = 24,
1218 XML_WITH_SCHEMAS = 25,
1219 XML_WITH_SCHEMATRON = 26,
1220 XML_WITH_MODULES = 27,
1221 XML_WITH_DEBUG = 28,
1222 XML_WITH_DEBUG_MEM = 29,
1223 XML_WITH_DEBUG_RUN = 30,
1224 XML_WITH_ZLIB = 31,
1225 XML_WITH_ICU = 32,
1226 XML_WITH_NONE = 99999 /* just to be sure of allocation size */
1227} xmlFeature;
1228
1229XMLPUBFUN int XMLCALL
1230 xmlHasFeature (xmlFeature feature);
1231
1232#ifdef __cplusplus
1233}
1234#endif
1235#endif /* __XML_PARSER_H__ */