deps/src/libxml2-2.9.1/doc/devhelp/libxml2-HTMLparser.html
| | | | | libxml2 Reference Manual |
HTMLparser - interface for an HTML 4.0 non-verifying parser
this module implements an HTML 4.0 non-verifying parser with API compatible with the XML parser ones. It should be able to parse "real world" HTML, even if severely broken from a specification point of view.
Author(s): Daniel Veillard
#define[htmlDefaultSubelement](#htmlDefaultSubelement)(elt);
#define[htmlElementAllowedHereDesc](#htmlElementAllowedHereDesc)(parent, elt);
#define[htmlRequiredAttrs](#htmlRequiredAttrs)(elt);
typedef[xmlParserNodeInfo](libxml2-parser.html#xmlParserNodeInfo)[htmlParserNodeInfo](#htmlParserNodeInfo);
typedef[xmlParserInput](libxml2-tree.html#xmlParserInput)[htmlParserInput](#htmlParserInput);
typedef[xmlParserCtxtPtr](libxml2-tree.html#xmlParserCtxtPtr)[htmlParserCtxtPtr](#htmlParserCtxtPtr);
typedef struct _htmlEntityDesc[htmlEntityDesc](#htmlEntityDesc);
typedef[xmlDocPtr](libxml2-tree.html#xmlDocPtr)[htmlDocPtr](#htmlDocPtr);
typedef[xmlSAXHandlerPtr](libxml2-tree.html#xmlSAXHandlerPtr)[htmlSAXHandlerPtr](#htmlSAXHandlerPtr);
typedef enum[htmlStatus](#htmlStatus);
typedef[xmlNodePtr](libxml2-tree.html#xmlNodePtr)[htmlNodePtr](#htmlNodePtr);
typedef[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)*[htmlElemDescPtr](#htmlElemDescPtr);
typedef struct _htmlElemDesc[htmlElemDesc](#htmlElemDesc);
typedef[xmlSAXHandler](libxml2-tree.html#xmlSAXHandler)[htmlSAXHandler](#htmlSAXHandler);
typedef[xmlParserInputPtr](libxml2-tree.html#xmlParserInputPtr)[htmlParserInputPtr](#htmlParserInputPtr);
typedef enum[htmlParserOption](#htmlParserOption);
typedef[htmlEntityDesc](libxml2-HTMLparser.html#htmlEntityDesc)*[htmlEntityDescPtr](#htmlEntityDescPtr);
typedef[xmlParserCtxt](libxml2-tree.html#xmlParserCtxt)[htmlParserCtxt](#htmlParserCtxt);
int[htmlIsScriptAttribute](#htmlIsScriptAttribute)(const[xmlChar](libxml2-xmlstring.html#xmlChar)* name);
int[htmlHandleOmittedElem](#htmlHandleOmittedElem)(int val);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlReadFd](#htmlReadFd)(int fd,
const char * URL,
const char * encoding,
int options);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlReadIO](#htmlReadIO)([xmlInputReadCallback](libxml2-xmlIO.html#xmlInputReadCallback)ioread,
[xmlInputCloseCallback](libxml2-xmlIO.html#xmlInputCloseCallback)ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlParseFile](#htmlParseFile)(const char * filename,
const char * encoding);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlCtxtReadDoc](#htmlCtxtReadDoc)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const[xmlChar](libxml2-xmlstring.html#xmlChar)* cur,
const char * URL,
const char * encoding,
int options);
int[htmlAutoCloseTag](#htmlAutoCloseTag)([htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)doc,
const[xmlChar](libxml2-xmlstring.html#xmlChar)* name,
[htmlNodePtr](libxml2-HTMLparser.html#htmlNodePtr)elem);
int[htmlParseChunk](#htmlParseChunk)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * chunk,
int size,
int terminate);
const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)*[htmlTagLookup](#htmlTagLookup)(const[xmlChar](libxml2-xmlstring.html#xmlChar)* tag);[htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)[htmlCreateMemoryParserCtxt](#htmlCreateMemoryParserCtxt)(const char * buffer,
int size);
void[htmlCtxtReset](#htmlCtxtReset)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt);
int[htmlElementAllowedHere](#htmlElementAllowedHere)(const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* parent,
const[xmlChar](libxml2-xmlstring.html#xmlChar)* elt);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlCtxtReadIO](#htmlCtxtReadIO)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
[xmlInputReadCallback](libxml2-xmlIO.html#xmlInputReadCallback)ioread,
[xmlInputCloseCallback](libxml2-xmlIO.html#xmlInputCloseCallback)ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options);[htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)[htmlCreatePushParserCtxt](#htmlCreatePushParserCtxt)([htmlSAXHandlerPtr](libxml2-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * user_data,
const char * chunk,
int size,
const char * filename,
[xmlCharEncoding](libxml2-encoding.html#xmlCharEncoding)enc);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlReadMemory](#htmlReadMemory)(const char * buffer,
int size,
const char * URL,
const char * encoding,
int options);
int[htmlIsAutoClosed](#htmlIsAutoClosed)([htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)doc,
[htmlNodePtr](libxml2-HTMLparser.html#htmlNodePtr)elem);
int[htmlParseCharRef](#htmlParseCharRef)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlReadDoc](#htmlReadDoc)(const[xmlChar](libxml2-xmlstring.html#xmlChar)* cur,
const char * URL,
const char * encoding,
int options);
int[htmlEncodeEntities](#htmlEncodeEntities)(unsigned char * out,
int * outlen,
const unsigned char * in,
int * inlen,
int quoteChar);[htmlStatus](libxml2-HTMLparser.html#htmlStatus)[htmlNodeStatus](#htmlNodeStatus)(const[htmlNodePtr](libxml2-HTMLparser.html#htmlNodePtr)node,
int legacy);[htmlStatus](libxml2-HTMLparser.html#htmlStatus)[htmlAttrAllowed](#htmlAttrAllowed)(const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* elt,
const[xmlChar](libxml2-xmlstring.html#xmlChar)* attr,
int legacy);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlSAXParseFile](#htmlSAXParseFile)(const char * filename,
const char * encoding,
[htmlSAXHandlerPtr](libxml2-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * userData);
const[htmlEntityDesc](libxml2-HTMLparser.html#htmlEntityDesc)*[htmlParseEntityRef](#htmlParseEntityRef)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const[xmlChar](libxml2-xmlstring.html#xmlChar)** str);[htmlStatus](libxml2-HTMLparser.html#htmlStatus)[htmlElementStatusHere](#htmlElementStatusHere)(const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* parent,
const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* elt);
const[htmlEntityDesc](libxml2-HTMLparser.html#htmlEntityDesc)*[htmlEntityValueLookup](#htmlEntityValueLookup)(unsigned int value);
void[htmlParseElement](#htmlParseElement)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt);
int[UTF8ToHtml](#UTF8ToHtml)(unsigned char * out,
int * outlen,
const unsigned char * in,
int * inlen);
const[htmlEntityDesc](libxml2-HTMLparser.html#htmlEntityDesc)*[htmlEntityLookup](#htmlEntityLookup)(const[xmlChar](libxml2-xmlstring.html#xmlChar)* name);
void[htmlFreeParserCtxt](#htmlFreeParserCtxt)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlCtxtReadMemory](#htmlCtxtReadMemory)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * buffer,
int size,
const char * URL,
const char * encoding,
int options);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlCtxtReadFd](#htmlCtxtReadFd)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
int fd,
const char * URL,
const char * encoding,
int options);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlReadFile](#htmlReadFile)(const char * filename,
const char * encoding,
int options);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlCtxtReadFile](#htmlCtxtReadFile)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * filename,
const char * encoding,
int options);
int[htmlParseDocument](#htmlParseDocument)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt);[htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)[htmlNewParserCtxt](#htmlNewParserCtxt)(void);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlSAXParseDoc](#htmlSAXParseDoc)([xmlChar](libxml2-xmlstring.html#xmlChar)* cur,
const char * encoding,
[htmlSAXHandlerPtr](libxml2-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * userData);
int[htmlCtxtUseOptions](#htmlCtxtUseOptions)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
int options);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlParseDoc](#htmlParseDoc)([xmlChar](libxml2-xmlstring.html#xmlChar)* cur,
const char * encoding);
#define[htmlDefaultSubelement](#htmlDefaultSubelement)(elt);
Returns the default subelement for this element
| elt: | HTML element |
#define[htmlElementAllowedHereDesc](#htmlElementAllowedHereDesc)(parent, elt);
Checks whether an HTML element description may be a direct child of the specified element. Returns 1 if allowed; 0 otherwise.
| parent: | HTML parent element | | elt: | HTML element |
#define[htmlRequiredAttrs](#htmlRequiredAttrs)(elt);
Returns the attributes required for the specified element.
| elt: | HTML element |
[xmlDocPtr](libxml2-tree.html#xmlDocPtr)htmlDocPtr;
struct _htmlElemDesc {
const char * name : The tag name
char startTag : Whether the start tag can be implied
char endTag : Whether the end tag can be implied
char saveEndTag : Whether the end tag should be saved
char empty : Is this an empty element ?
char depr : Is this a deprecated element ?
char dtd : 1: only in Loose DTD, 2: only Frameset one
char isinline : is this a block 0 or inline 1 element
const char * desc : the description NRK Jan.2003 * New fields encapsulating HTML structur
const char ** subelts : allowed sub-elements of this element
const char * defaultsubelt : subelement for suggested auto-repair if necessary or NULL
const char ** attrs_opt : Optional Attributes
const char ** attrs_depr : Additional deprecated attributes
const char ** attrs_req : Required attributes
} htmlElemDesc;
[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* htmlElemDescPtr;
struct _htmlEntityDesc {
unsigned int value : the UNICODE value for the character
const char * name : The entity name
const char * desc : the description
} htmlEntityDesc;
[htmlEntityDesc](libxml2-HTMLparser.html#htmlEntityDesc)* htmlEntityDescPtr;
[xmlNodePtr](libxml2-tree.html#xmlNodePtr)htmlNodePtr;
[xmlParserCtxt](libxml2-tree.html#xmlParserCtxt)htmlParserCtxt;
[xmlParserCtxtPtr](libxml2-tree.html#xmlParserCtxtPtr)htmlParserCtxtPtr;
[xmlParserInput](libxml2-tree.html#xmlParserInput)htmlParserInput;
[xmlParserInputPtr](libxml2-tree.html#xmlParserInputPtr)htmlParserInputPtr;
[xmlParserNodeInfo](libxml2-parser.html#xmlParserNodeInfo)htmlParserNodeInfo;
enum[htmlParserOption](#htmlParserOption){HTML\_PARSE\_RECOVER= 1 /* Relaxed parsing */HTML\_PARSE\_NODEFDTD= 4 /* do not default a doctype if not found */HTML\_PARSE\_NOERROR= 32 /* suppress error reports */HTML\_PARSE\_NOWARNING= 64 /* suppress warning reports */HTML\_PARSE\_PEDANTIC= 128 /* pedantic error reporting */HTML\_PARSE\_NOBLANKS= 256 /* remove blank nodes */HTML\_PARSE\_NONET= 2048 /* Forbid network access */HTML\_PARSE\_NOIMPLIED= 8192 /* Do not add implied html/body... elements */HTML\_PARSE\_COMPACT= 65536 /* compact small text nodes */HTML\_PARSE\_IGNORE\_ENC= 2097152 /* ignore internal document encoding hint */
};
[xmlSAXHandler](libxml2-tree.html#xmlSAXHandler)htmlSAXHandler;
[xmlSAXHandlerPtr](libxml2-tree.html#xmlSAXHandlerPtr)htmlSAXHandlerPtr;
enum[htmlStatus](#htmlStatus){HTML\_NA= 0 /* something we don't check at all */HTML\_INVALID= 1HTML\_DEPRECATED= 2HTML\_VALID= 4HTML\_REQUIRED= 12 /* VALID bit set so ( & HTML_VALID ) is TRUE */
};
int UTF8ToHtml (unsigned char * out,
int * outlen,
const unsigned char * in,
int * inlen)
Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.
| out: | a pointer to an array of bytes to store the result | | outlen: | the length of @out | | in: | a pointer to an array of UTF-8 chars | | inlen: | the length of @in | | Returns: | 0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed. |
[htmlStatus](libxml2-HTMLparser.html#htmlStatus)htmlAttrAllowed (const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* elt,
const[xmlChar](libxml2-xmlstring.html#xmlChar)* attr,
int legacy)
Checks whether an attribute is valid for an element Has full knowledge of Required and Deprecated attributes
| elt: | HTML element | | attr: | HTML attribute | | legacy: | whether to allow deprecated attributes | | Returns: | one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, HTML_INVALID |
int htmlAutoCloseTag ([htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)doc,
const[xmlChar](libxml2-xmlstring.html#xmlChar)* name,
[htmlNodePtr](libxml2-HTMLparser.html#htmlNodePtr)elem)
The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if the element or one of it's children would autoclose the given tag.
| doc: | the HTML document | | name: | The tag name | | elem: | the HTML element | | Returns: | 1 if autoclose, 0 otherwise |
[htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)htmlCreateMemoryParserCtxt (const char * buffer,
int size)
Create a parser context for an HTML in-memory document.
| buffer: | a pointer to a char array | | size: | the size of the array | | Returns: | the new parser context or NULL |
[htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)htmlCreatePushParserCtxt ([htmlSAXHandlerPtr](libxml2-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * user_data,
const char * chunk,
int size,
const char * filename,
[xmlCharEncoding](libxml2-encoding.html#xmlCharEncoding)enc)
Create a parser context for using the HTML parser in push mode The value of @filename is used for fetching external entities and error/warning reports.
| sax: | a SAX handler | | user_data: | The user data returned on SAX callbacks | | chunk: | a pointer to an array of chars | | size: | number of chars in the array | | filename: | an optional file name or URI | | enc: | an optional encoding | | Returns: | the new parser context or NULL |
[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlCtxtReadDoc ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const[xmlChar](libxml2-xmlstring.html#xmlChar)* cur,
const char * URL,
const char * encoding,
int options)
parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context
| ctxt: | an HTML parser context | | cur: | a pointer to a zero terminated string | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |
[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlCtxtReadFd ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
int fd,
const char * URL,
const char * encoding,
int options)
parse an XML from a file descriptor and build a tree. This reuses the existing @ctxt parser context
| ctxt: | an HTML parser context | | fd: | an open file descriptor | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |
[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlCtxtReadFile ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * filename,
const char * encoding,
int options)
parse an XML file from the filesystem or the network. This reuses the existing @ctxt parser context
| ctxt: | an HTML parser context | | filename: | a file or URL | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |
[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlCtxtReadIO ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
[xmlInputReadCallback](libxml2-xmlIO.html#xmlInputReadCallback)ioread,
[xmlInputCloseCallback](libxml2-xmlIO.html#xmlInputCloseCallback)ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options)
parse an HTML document from I/O functions and source and build a tree. This reuses the existing @ctxt parser context
| ctxt: | an HTML parser context | | ioread: | an I/O read function | | ioclose: | an I/O close function | | ioctx: | an I/O handler | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |
[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlCtxtReadMemory ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * buffer,
int size,
const char * URL,
const char * encoding,
int options)
parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context
| ctxt: | an HTML parser context | | buffer: | a pointer to a char array | | size: | the size of the array | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |
void htmlCtxtReset ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt)
Reset a parser context
| ctxt: | an HTML parser context |
int htmlCtxtUseOptions ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
int options)
Applies the options to the parser context
| ctxt: | an HTML parser context | | options: | a combination of htmlParserOption(s) | | Returns: | 0 in case of success, the set of unknown or unimplemented options in case of error. |
int htmlElementAllowedHere (const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* parent,
const[xmlChar](libxml2-xmlstring.html#xmlChar)* elt)
Checks whether an HTML element may be a direct child of a parent element. Note - doesn't check for deprecated elements
| parent: | HTML parent element | | elt: | HTML element | | Returns: | 1 if allowed; 0 otherwise. |
[htmlStatus](libxml2-HTMLparser.html#htmlStatus)htmlElementStatusHere (const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* parent,
const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* elt)
Checks whether an HTML element may be a direct child of a parent element. and if so whether it is valid or deprecated.
| parent: | HTML parent element | | elt: | HTML element | | Returns: | one of HTML_VALID, HTML_DEPRECATED, HTML_INVALID |
int htmlEncodeEntities (unsigned char * out,
int * outlen,
const unsigned char * in,
int * inlen,
int quoteChar)
Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.
| out: | a pointer to an array of bytes to store the result | | outlen: | the length of @out | | in: | a pointer to an array of UTF-8 chars | | inlen: | the length of @in | | quoteChar: | the quote character to escape (' or ") or zero. | | Returns: | 0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed. |
const[htmlEntityDesc](libxml2-HTMLparser.html#htmlEntityDesc)* htmlEntityLookup (const[xmlChar](libxml2-xmlstring.html#xmlChar)* name)
Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.
| name: | the entity name | | Returns: | the associated htmlEntityDescPtr if found, NULL otherwise. |
const[htmlEntityDesc](libxml2-HTMLparser.html#htmlEntityDesc)* htmlEntityValueLookup (unsigned int value)
Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.
| value: | the entity's unicode value | | Returns: | the associated htmlEntityDescPtr if found, NULL otherwise. |
void htmlFreeParserCtxt ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt)
Free all the memory used by a parser context. However the parsed document in ctxt->myDoc is not freed.
| ctxt: | an HTML parser context |
int htmlHandleOmittedElem (int val)
Set and return the previous value for handling HTML omitted tags.
| val: | int 0 or 1 | | Returns: | the last value for 0 for no handling, 1 for auto insertion. |
int htmlIsAutoClosed ([htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)doc,
[htmlNodePtr](libxml2-HTMLparser.html#htmlNodePtr)elem)
The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if a tag is autoclosed by one of it's child
| doc: | the HTML document | | elem: | the HTML element | | Returns: | 1 if autoclosed, 0 otherwise |
int htmlIsScriptAttribute (const[xmlChar](libxml2-xmlstring.html#xmlChar)* name)
Check if an attribute is of content type Script
| name: | an attribute name | | Returns: | 1 is the attribute is a script 0 otherwise |
[htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)htmlNewParserCtxt (void)
Allocate and initialize a new parser context.
| Returns: | the htmlParserCtxtPtr or NULL in case of allocation error |
[htmlStatus](libxml2-HTMLparser.html#htmlStatus)htmlNodeStatus (const[htmlNodePtr](libxml2-HTMLparser.html#htmlNodePtr)node,
int legacy)
Checks whether the tree node is valid. Experimental (the author only uses the HTML enhancements in a SAX parser)
| node: | an htmlNodePtr in a tree | | legacy: | whether to allow deprecated elements (YES is faster here for Element nodes) | | Returns: | for Element nodes, a return from htmlElementAllowedHere (if legacy allowed) or htmlElementStatusHere (otherwise). for Attribute nodes, a return from htmlAttrAllowed for other nodes, HTML_NA (no checks performed) |
int htmlParseCharRef ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt)
parse Reference declarations [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
| ctxt: | an HTML parser context | | Returns: | the value parsed (as an int) |
int htmlParseChunk ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * chunk,
int size,
int terminate)
Parse a Chunk of memory
| ctxt: | an HTML parser context | | chunk: | an char array | | size: | the size in byte of the chunk | | terminate: | last chunk indicator | | Returns: | zero if no error, the xmlParserErrors otherwise. |
[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlParseDoc ([xmlChar](libxml2-xmlstring.html#xmlChar)* cur,
const char * encoding)
parse an HTML in-memory document and build a tree.
| cur: | a pointer to an array of xmlChar | | encoding: | a free form C string describing the HTML document encoding, or NULL | | Returns: | the resulting document tree |
int htmlParseDocument ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt)
parse an HTML document (and build a tree if using the standard SAX interface).
| ctxt: | an HTML parser context | | Returns: | 0, -1 in case of error. the parser context is augmented as a result of the parsing. |
void htmlParseElement ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt)
parse an HTML element, this is highly recursive this is kept for compatibility with previous code versions [39] element ::= EmptyElemTag | STag content ETag [41] Attribute ::= Name Eq AttValue
| ctxt: | an HTML parser context |
const[htmlEntityDesc](libxml2-HTMLparser.html#htmlEntityDesc)* htmlParseEntityRef ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const[xmlChar](libxml2-xmlstring.html#xmlChar)** str)
parse an HTML ENTITY references [68] EntityRef ::= '&' Name ';'
| ctxt: | an HTML parser context | | str: | location to store the entity name | | Returns: | the associated htmlEntityDescPtr if found, or NULL otherwise, if non-NULL *str will have to be freed by the caller. |
[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlParseFile (const char * filename,
const char * encoding)
parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.
| filename: | the filename | | encoding: | a free form C string describing the HTML document encoding, or NULL | | Returns: | the resulting document tree |
[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlReadDoc (const[xmlChar](libxml2-xmlstring.html#xmlChar)* cur,
const char * URL,
const char * encoding,
int options)
parse an XML in-memory document and build a tree.
| cur: | a pointer to a zero terminated string | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |
[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlReadFd (int fd,
const char * URL,
const char * encoding,
int options)
parse an XML from a file descriptor and build a tree.
| fd: | an open file descriptor | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |
[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlReadFile (const char * filename,
const char * encoding,
int options)
parse an XML file from the filesystem or the network.
| filename: | a file or URL | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |
[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlReadIO ([xmlInputReadCallback](libxml2-xmlIO.html#xmlInputReadCallback)ioread,
[xmlInputCloseCallback](libxml2-xmlIO.html#xmlInputCloseCallback)ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options)
parse an HTML document from I/O functions and source and build a tree.
| ioread: | an I/O read function | | ioclose: | an I/O close function | | ioctx: | an I/O handler | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |
[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlReadMemory (const char * buffer,
int size,
const char * URL,
const char * encoding,
int options)
parse an XML in-memory document and build a tree.
| buffer: | a pointer to a char array | | size: | the size of the array | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |
[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlSAXParseDoc ([xmlChar](libxml2-xmlstring.html#xmlChar)* cur,
const char * encoding,
[htmlSAXHandlerPtr](libxml2-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * userData)
Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks to handle parse events. If sax is NULL, fallback to the default DOM behavior and return a tree.
| cur: | a pointer to an array of xmlChar | | encoding: | a free form C string describing the HTML document encoding, or NULL | | sax: | the SAX handler block | | userData: | if using SAX, this pointer will be provided on callbacks. | | Returns: | the resulting document tree unless SAX is NULL or the document is not well formed. |
[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlSAXParseFile (const char * filename,
const char * encoding,
[htmlSAXHandlerPtr](libxml2-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * userData)
parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.
| filename: | the filename | | encoding: | a free form C string describing the HTML document encoding, or NULL | | sax: | the SAX handler block | | userData: | if using SAX, this pointer will be provided on callbacks. | | Returns: | the resulting document tree unless SAX is NULL or the document is not well formed. |
const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* htmlTagLookup (const[xmlChar](libxml2-xmlstring.html#xmlChar)* tag)
Lookup the HTML tag in the ElementTable
| tag: | The tag name in lowercase | | Returns: | the related htmlElemDescPtr or NULL if not found. |