Back to Turicreate

Module HTMLparser from libxml2

deps/src/libxml2-2.9.1/doc/html/libxml-HTMLparser.html

6.4.131.2 KB
Original Source

| |

|

|

|

Module HTMLparser from libxml2

|

|

|

|

|

|

| API Menu | |

|

| API Indexes | |

|

| Related links | |

|

|

|

|

|

|

| | DOCBparser | | API documentation | | The XML C parser and toolkit of Gnome | HTMLtree | |

this module implements an HTML 4.0 non-verifying parser with API compatible with the XML parser ones. It should be able to parse "real world" HTML, even if severely broken from a specification point of view.

Table of Contents

#define[htmlDefaultSubelement](#htmlDefaultSubelement)
#define[htmlElementAllowedHereDesc](#htmlElementAllowedHereDesc)
#define[htmlRequiredAttrs](#htmlRequiredAttrs)
Typedef[xmlDocPtr](libxml-tree.html#xmlDocPtr)htmlDocPtr
Structure[htmlElemDesc](#htmlElemDesc)
struct _htmlElemDesc
Typedef[htmlElemDesc](libxml-HTMLparser.html#htmlElemDesc)*htmlElemDescPtr
Structure[htmlEntityDesc](#htmlEntityDesc)
struct _htmlEntityDesc
Typedef[htmlEntityDesc](libxml-HTMLparser.html#htmlEntityDesc)*htmlEntityDescPtr
Typedef[xmlNodePtr](libxml-tree.html#xmlNodePtr)htmlNodePtr
Typedef[xmlParserCtxt](libxml-tree.html#xmlParserCtxt)htmlParserCtxt
Typedef[xmlParserCtxtPtr](libxml-tree.html#xmlParserCtxtPtr)htmlParserCtxtPtr
Typedef[xmlParserInput](libxml-tree.html#xmlParserInput)htmlParserInput
Typedef[xmlParserInputPtr](libxml-tree.html#xmlParserInputPtr)htmlParserInputPtr
Typedef[xmlParserNodeInfo](libxml-parser.html#xmlParserNodeInfo)htmlParserNodeInfo
Enum[htmlParserOption](#htmlParserOption)
Typedef[xmlSAXHandler](libxml-tree.html#xmlSAXHandler)htmlSAXHandler
Typedef[xmlSAXHandlerPtr](libxml-tree.html#xmlSAXHandlerPtr)htmlSAXHandlerPtr
Enum[htmlStatus](#htmlStatus)
int[UTF8ToHtml](#UTF8ToHtml)(unsigned char * out,
int * outlen,
const unsigned char * in,
int * inlen)
[htmlStatus](libxml-HTMLparser.html#htmlStatus)[htmlAttrAllowed](#htmlAttrAllowed)(const[htmlElemDesc](libxml-HTMLparser.html#htmlElemDesc)* elt,
const[xmlChar](libxml-xmlstring.html#xmlChar)* attr,
int legacy)
int[htmlAutoCloseTag](#htmlAutoCloseTag)([htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)doc,
const[xmlChar](libxml-xmlstring.html#xmlChar)* name,
[htmlNodePtr](libxml-HTMLparser.html#htmlNodePtr)elem)
[htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)[htmlCreateMemoryParserCtxt](#htmlCreateMemoryParserCtxt)(const char * buffer,
int size)
[htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)[htmlCreatePushParserCtxt](#htmlCreatePushParserCtxt)([htmlSAXHandlerPtr](libxml-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * user_data,
const char * chunk,
int size,
const char * filename,
[xmlCharEncoding](libxml-encoding.html#xmlCharEncoding)enc)
[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)[htmlCtxtReadDoc](#htmlCtxtReadDoc)([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const[xmlChar](libxml-xmlstring.html#xmlChar)* cur,
const char * URL,
const char * encoding,
int options)
[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)[htmlCtxtReadFd](#htmlCtxtReadFd)([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt,
int fd,
const char * URL,
const char * encoding,
int options)
[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)[htmlCtxtReadFile](#htmlCtxtReadFile)([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * filename,
const char * encoding,
int options)
[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)[htmlCtxtReadIO](#htmlCtxtReadIO)([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt,
[xmlInputReadCallback](libxml-xmlIO.html#xmlInputReadCallback)ioread,
[xmlInputCloseCallback](libxml-xmlIO.html#xmlInputCloseCallback)ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options)
[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)[htmlCtxtReadMemory](#htmlCtxtReadMemory)([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * buffer,
int size,
const char * URL,
const char * encoding,
int options)
void[htmlCtxtReset](#htmlCtxtReset)([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt)
int[htmlCtxtUseOptions](#htmlCtxtUseOptions)([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt,
int options)
int[htmlElementAllowedHere](#htmlElementAllowedHere)(const[htmlElemDesc](libxml-HTMLparser.html#htmlElemDesc)* parent,
const[xmlChar](libxml-xmlstring.html#xmlChar)* elt)
[htmlStatus](libxml-HTMLparser.html#htmlStatus)[htmlElementStatusHere](#htmlElementStatusHere)(const[htmlElemDesc](libxml-HTMLparser.html#htmlElemDesc)* parent,
const[htmlElemDesc](libxml-HTMLparser.html#htmlElemDesc)* elt)
int[htmlEncodeEntities](#htmlEncodeEntities)(unsigned char * out,
int * outlen,
const unsigned char * in,
int * inlen,
int quoteChar)
const[htmlEntityDesc](libxml-HTMLparser.html#htmlEntityDesc)*[htmlEntityLookup](#htmlEntityLookup)(const[xmlChar](libxml-xmlstring.html#xmlChar)* name)
const[htmlEntityDesc](libxml-HTMLparser.html#htmlEntityDesc)*[htmlEntityValueLookup](#htmlEntityValueLookup)(unsigned int value)
void[htmlFreeParserCtxt](#htmlFreeParserCtxt)([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt)
int[htmlHandleOmittedElem](#htmlHandleOmittedElem)(int val)
int[htmlIsAutoClosed](#htmlIsAutoClosed)([htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)doc,
[htmlNodePtr](libxml-HTMLparser.html#htmlNodePtr)elem)
int[htmlIsScriptAttribute](#htmlIsScriptAttribute)(const[xmlChar](libxml-xmlstring.html#xmlChar)* name)
[htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)[htmlNewParserCtxt](#htmlNewParserCtxt)(void)
[htmlStatus](libxml-HTMLparser.html#htmlStatus)[htmlNodeStatus](#htmlNodeStatus)(const[htmlNodePtr](libxml-HTMLparser.html#htmlNodePtr)node,
int legacy)
int[htmlParseCharRef](#htmlParseCharRef)([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt)
int[htmlParseChunk](#htmlParseChunk)([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * chunk,
int size,
int terminate)
[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)[htmlParseDoc](#htmlParseDoc)([xmlChar](libxml-xmlstring.html#xmlChar)* cur,
const char * encoding)
int[htmlParseDocument](#htmlParseDocument)([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt)
void[htmlParseElement](#htmlParseElement)([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt)
const[htmlEntityDesc](libxml-HTMLparser.html#htmlEntityDesc)*[htmlParseEntityRef](#htmlParseEntityRef)([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const[xmlChar](libxml-xmlstring.html#xmlChar)** str)
[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)[htmlParseFile](#htmlParseFile)(const char * filename,
const char * encoding)
[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)[htmlReadDoc](#htmlReadDoc)(const[xmlChar](libxml-xmlstring.html#xmlChar)* cur,
const char * URL,
const char * encoding,
int options)
[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)[htmlReadFd](#htmlReadFd)(int fd,
const char * URL,
const char * encoding,
int options)
[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)[htmlReadFile](#htmlReadFile)(const char * filename,
const char * encoding,
int options)
[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)[htmlReadIO](#htmlReadIO)([xmlInputReadCallback](libxml-xmlIO.html#xmlInputReadCallback)ioread,
[xmlInputCloseCallback](libxml-xmlIO.html#xmlInputCloseCallback)ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options)
[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)[htmlReadMemory](#htmlReadMemory)(const char * buffer,
int size,
const char * URL,
const char * encoding,
int options)
[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)[htmlSAXParseDoc](#htmlSAXParseDoc)([xmlChar](libxml-xmlstring.html#xmlChar)* cur,
const char * encoding,
[htmlSAXHandlerPtr](libxml-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * userData)
[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)[htmlSAXParseFile](#htmlSAXParseFile)(const char * filename,
const char * encoding,
[htmlSAXHandlerPtr](libxml-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * userData)
const[htmlElemDesc](libxml-HTMLparser.html#htmlElemDesc)*[htmlTagLookup](#htmlTagLookup)(const[xmlChar](libxml-xmlstring.html#xmlChar)* tag)

Description

Macro: htmlDefaultSubelement

#define htmlDefaultSubelement

Returns the default subelement for this element

Macro: htmlElementAllowedHereDesc

#define htmlElementAllowedHereDesc

Checks whether an HTML element description may be a direct child of the specified element. Returns 1 if allowed; 0 otherwise.

Macro: htmlRequiredAttrs

#define htmlRequiredAttrs

Returns the attributes required for the specified element.

Structure htmlElemDesc

Structure htmlElemDesc
struct _htmlElemDesc {
    const char *	name	: The tag name
    char	startTag	: Whether the start tag can be implied
    char	endTag	: Whether the end tag can be implied
    char	saveEndTag	: Whether the end tag should be saved
    char	empty	: Is this an empty element ?
    char	depr	: Is this a deprecated element ?
    char	dtd	: 1: only in Loose DTD, 2: only Frameset
    char	isinline	: is this a block 0 or inline 1 element
    const char *	desc	: the description NRK Jan.2003 * New fiel
    const char **	subelts	: allowed sub-elements of this element
    const char *	defaultsubelt	: subelement for suggested auto-repair if
    const char **	attrs_opt	: Optional Attributes
    const char **	attrs_depr	: Additional deprecated attributes
    const char **	attrs_req	: Required attributes
}

Structure htmlEntityDesc

Structure htmlEntityDesc
struct _htmlEntityDesc {
    unsigned int	value	: the UNICODE value for the character
    const char *	name	: The entity name
    const char *	desc	: the description
}

Enum htmlParserOption

Enum htmlParserOption {HTML\_PARSE\_RECOVER= 1 : Relaxed parsingHTML\_PARSE\_NODEFDTD= 4 : do not default a doctype if not foundHTML\_PARSE\_NOERROR= 32 : suppress error reportsHTML\_PARSE\_NOWARNING= 64 : suppress warning reportsHTML\_PARSE\_PEDANTIC= 128 : pedantic error reportingHTML\_PARSE\_NOBLANKS= 256 : remove blank nodesHTML\_PARSE\_NONET= 2048 : Forbid network accessHTML\_PARSE\_NOIMPLIED= 8192 : Do not add implied html/body... elementsHTML\_PARSE\_COMPACT= 65536 : compact small text nodesHTML\_PARSE\_IGNORE\_ENC= 2097152 : ignore internal document encoding hint
}

Enum htmlStatus

Enum htmlStatus {HTML\_NA= 0 : something we don't check at allHTML\_INVALID= 1HTML\_DEPRECATED= 2HTML\_VALID= 4HTML\_REQUIRED= 12 : VALID bit set so ( &[HTML\_VALID](libxml-HTMLparser.html#HTML_VALID)) is TRUE
}

Function: UTF8ToHtml

int	UTF8ToHtml (unsigned char * out,
int * outlen,
const unsigned char * in,
int * inlen)

Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.

| out: | a pointer to an array of bytes to store the result | | outlen: | the length of @out | | in: | a pointer to an array of UTF-8 chars | | inlen: | the length of @in | | Returns: | 0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed. |

Function: htmlAttrAllowed

[htmlStatus](libxml-HTMLparser.html#htmlStatus)htmlAttrAllowed (const[htmlElemDesc](libxml-HTMLparser.html#htmlElemDesc)* elt,
const[xmlChar](libxml-xmlstring.html#xmlChar)* attr,
int legacy)

Checks whether an attribute is valid for an element Has full knowledge of Required and Deprecated attributes

| elt: | HTML element | | attr: | HTML attribute | | legacy: | whether to allow deprecated attributes | | Returns: | one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, HTML_INVALID |

Function: htmlAutoCloseTag

int	htmlAutoCloseTag ([htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)doc,
const[xmlChar](libxml-xmlstring.html#xmlChar)* name,
[htmlNodePtr](libxml-HTMLparser.html#htmlNodePtr)elem)

The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if the element or one of it's children would autoclose the given tag.

| doc: | the HTML document | | name: | The tag name | | elem: | the HTML element | | Returns: | 1 if autoclose, 0 otherwise |

Function: htmlCreateMemoryParserCtxt

[htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)htmlCreateMemoryParserCtxt	(const char * buffer,
int size)

Create a parser context for an HTML in-memory document.

| buffer: | a pointer to a char array | | size: | the size of the array | | Returns: | the new parser context or NULL |

Function: htmlCreatePushParserCtxt

[htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)htmlCreatePushParserCtxt	([htmlSAXHandlerPtr](libxml-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * user_data,
const char * chunk,
int size,
const char * filename,
[xmlCharEncoding](libxml-encoding.html#xmlCharEncoding)enc)

Create a parser context for using the HTML parser in push mode The value of @filename is used for fetching external entities and error/warning reports.

| sax: | a SAX handler | | user_data: | The user data returned on SAX callbacks | | chunk: | a pointer to an array of chars | | size: | number of chars in the array | | filename: | an optional file name or URI | | enc: | an optional encoding | | Returns: | the new parser context or NULL |

Function: htmlCtxtReadDoc

[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)htmlCtxtReadDoc ([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const[xmlChar](libxml-xmlstring.html#xmlChar)* cur,
const char * URL,
const char * encoding,
int options)

parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context

| ctxt: | an HTML parser context | | cur: | a pointer to a zero terminated string | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |

Function: htmlCtxtReadFd

[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)htmlCtxtReadFd ([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt,
int fd,
const char * URL,
const char * encoding,
int options)

parse an XML from a file descriptor and build a tree. This reuses the existing @ctxt parser context

| ctxt: | an HTML parser context | | fd: | an open file descriptor | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |

Function: htmlCtxtReadFile

[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)htmlCtxtReadFile	([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * filename,
const char * encoding,
int options)

parse an XML file from the filesystem or the network. This reuses the existing @ctxt parser context

| ctxt: | an HTML parser context | | filename: | a file or URL | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |

Function: htmlCtxtReadIO

[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)htmlCtxtReadIO ([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt,
[xmlInputReadCallback](libxml-xmlIO.html#xmlInputReadCallback)ioread,
[xmlInputCloseCallback](libxml-xmlIO.html#xmlInputCloseCallback)ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options)

parse an HTML document from I/O functions and source and build a tree. This reuses the existing @ctxt parser context

| ctxt: | an HTML parser context | | ioread: | an I/O read function | | ioclose: | an I/O close function | | ioctx: | an I/O handler | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |

Function: htmlCtxtReadMemory

[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)htmlCtxtReadMemory	([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * buffer,
int size,
const char * URL,
const char * encoding,
int options)

parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context

| ctxt: | an HTML parser context | | buffer: | a pointer to a char array | | size: | the size of the array | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |

Function: htmlCtxtReset

void	htmlCtxtReset ([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt)

Reset a parser context

| ctxt: | an HTML parser context |

Function: htmlCtxtUseOptions

int	htmlCtxtUseOptions ([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt,
int options)

Applies the options to the parser context

| ctxt: | an HTML parser context | | options: | a combination of htmlParserOption(s) | | Returns: | 0 in case of success, the set of unknown or unimplemented options in case of error. |

Function: htmlElementAllowedHere

int	htmlElementAllowedHere (const[htmlElemDesc](libxml-HTMLparser.html#htmlElemDesc)* parent,
const[xmlChar](libxml-xmlstring.html#xmlChar)* elt)

Checks whether an HTML element may be a direct child of a parent element. Note - doesn't check for deprecated elements

| parent: | HTML parent element | | elt: | HTML element | | Returns: | 1 if allowed; 0 otherwise. |

Function: htmlElementStatusHere

[htmlStatus](libxml-HTMLparser.html#htmlStatus)htmlElementStatusHere	(const[htmlElemDesc](libxml-HTMLparser.html#htmlElemDesc)* parent,
const[htmlElemDesc](libxml-HTMLparser.html#htmlElemDesc)* elt)

Checks whether an HTML element may be a direct child of a parent element. and if so whether it is valid or deprecated.

| parent: | HTML parent element | | elt: | HTML element | | Returns: | one of HTML_VALID, HTML_DEPRECATED, HTML_INVALID |

Function: htmlEncodeEntities

int	htmlEncodeEntities (unsigned char * out,
int * outlen,
const unsigned char * in,
int * inlen,
int quoteChar)

Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.

| out: | a pointer to an array of bytes to store the result | | outlen: | the length of @out | | in: | a pointer to an array of UTF-8 chars | | inlen: | the length of @in | | quoteChar: | the quote character to escape (' or ") or zero. | | Returns: | 0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed. |

Function: htmlEntityLookup

const[htmlEntityDesc](libxml-HTMLparser.html#htmlEntityDesc)*	htmlEntityLookup	(const[xmlChar](libxml-xmlstring.html#xmlChar)* name)

Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.

| name: | the entity name | | Returns: | the associated htmlEntityDescPtr if found, NULL otherwise. |

Function: htmlEntityValueLookup

const[htmlEntityDesc](libxml-HTMLparser.html#htmlEntityDesc)*	htmlEntityValueLookup	(unsigned int value)

Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.

| value: | the entity's unicode value | | Returns: | the associated htmlEntityDescPtr if found, NULL otherwise. |

Function: htmlFreeParserCtxt

void	htmlFreeParserCtxt ([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt)

Free all the memory used by a parser context. However the parsed document in ctxt->myDoc is not freed.

| ctxt: | an HTML parser context |

Function: htmlHandleOmittedElem

int	htmlHandleOmittedElem (int val)

Set and return the previous value for handling HTML omitted tags.

| val: | int 0 or 1 | | Returns: | the last value for 0 for no handling, 1 for auto insertion. |

Function: htmlIsAutoClosed

int	htmlIsAutoClosed ([htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)doc,
[htmlNodePtr](libxml-HTMLparser.html#htmlNodePtr)elem)

The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if a tag is autoclosed by one of it's child

| doc: | the HTML document | | elem: | the HTML element | | Returns: | 1 if autoclosed, 0 otherwise |

Function: htmlIsScriptAttribute

int	htmlIsScriptAttribute (const[xmlChar](libxml-xmlstring.html#xmlChar)* name)

Check if an attribute is of content type Script

| name: | an attribute name | | Returns: | 1 is the attribute is a script 0 otherwise |

Function: htmlNewParserCtxt

[htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)htmlNewParserCtxt	(void)

Allocate and initialize a new parser context.

| Returns: | the htmlParserCtxtPtr or NULL in case of allocation error |

Function: htmlNodeStatus

[htmlStatus](libxml-HTMLparser.html#htmlStatus)htmlNodeStatus (const[htmlNodePtr](libxml-HTMLparser.html#htmlNodePtr)node,
int legacy)

Checks whether the tree node is valid. Experimental (the author only uses the HTML enhancements in a SAX parser)

| node: | an htmlNodePtr in a tree | | legacy: | whether to allow deprecated elements (YES is faster here for Element nodes) | | Returns: | for Element nodes, a return from htmlElementAllowedHere (if legacy allowed) or htmlElementStatusHere (otherwise). for Attribute nodes, a return from htmlAttrAllowed for other nodes, HTML_NA (no checks performed) |

Function: htmlParseCharRef

int	htmlParseCharRef ([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt)

parse Reference declarations [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'

| ctxt: | an HTML parser context | | Returns: | the value parsed (as an int) |

Function: htmlParseChunk

int	htmlParseChunk ([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * chunk,
int size,
int terminate)

Parse a Chunk of memory

| ctxt: | an HTML parser context | | chunk: | an char array | | size: | the size in byte of the chunk | | terminate: | last chunk indicator | | Returns: | zero if no error, the xmlParserErrors otherwise. |

Function: htmlParseDoc

[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)htmlParseDoc ([xmlChar](libxml-xmlstring.html#xmlChar)* cur,
const char * encoding)

parse an HTML in-memory document and build a tree.

| cur: | a pointer to an array of xmlChar | | encoding: | a free form C string describing the HTML document encoding, or NULL | | Returns: | the resulting document tree |

Function: htmlParseDocument

int	htmlParseDocument ([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt)

parse an HTML document (and build a tree if using the standard SAX interface).

| ctxt: | an HTML parser context | | Returns: | 0, -1 in case of error. the parser context is augmented as a result of the parsing. |

Function: htmlParseElement

void	htmlParseElement ([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt)

parse an HTML element, this is highly recursive this is kept for compatibility with previous code versions [39] element ::= EmptyElemTag | STag content ETag [41] Attribute ::= Name Eq AttValue

| ctxt: | an HTML parser context |

Function: htmlParseEntityRef

const[htmlEntityDesc](libxml-HTMLparser.html#htmlEntityDesc)*	htmlParseEntityRef	([htmlParserCtxtPtr](libxml-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const[xmlChar](libxml-xmlstring.html#xmlChar)** str)

parse an HTML ENTITY references [68] EntityRef ::= '&' Name ';'

| ctxt: | an HTML parser context | | str: | location to store the entity name | | Returns: | the associated htmlEntityDescPtr if found, or NULL otherwise, if non-NULL *str will have to be freed by the caller. |

Function: htmlParseFile

[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)htmlParseFile (const char * filename,
const char * encoding)

parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.

| filename: | the filename | | encoding: | a free form C string describing the HTML document encoding, or NULL | | Returns: | the resulting document tree |

Function: htmlReadDoc

[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)htmlReadDoc (const[xmlChar](libxml-xmlstring.html#xmlChar)* cur,
const char * URL,
const char * encoding,
int options)

parse an XML in-memory document and build a tree.

| cur: | a pointer to a zero terminated string | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |

Function: htmlReadFd

[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)htmlReadFd (int fd,
const char * URL,
const char * encoding,
int options)

parse an XML from a file descriptor and build a tree.

| fd: | an open file descriptor | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |

Function: htmlReadFile

[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)htmlReadFile (const char * filename,
const char * encoding,
int options)

parse an XML file from the filesystem or the network.

| filename: | a file or URL | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |

Function: htmlReadIO

[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)htmlReadIO ([xmlInputReadCallback](libxml-xmlIO.html#xmlInputReadCallback)ioread,
[xmlInputCloseCallback](libxml-xmlIO.html#xmlInputCloseCallback)ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options)

parse an HTML document from I/O functions and source and build a tree.

| ioread: | an I/O read function | | ioclose: | an I/O close function | | ioctx: | an I/O handler | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |

Function: htmlReadMemory

[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)htmlReadMemory (const char * buffer,
int size,
const char * URL,
const char * encoding,
int options)

parse an XML in-memory document and build a tree.

| buffer: | a pointer to a char array | | size: | the size of the array | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |

Function: htmlSAXParseDoc

[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)htmlSAXParseDoc ([xmlChar](libxml-xmlstring.html#xmlChar)* cur,
const char * encoding,
[htmlSAXHandlerPtr](libxml-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * userData)

Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks to handle parse events. If sax is NULL, fallback to the default DOM behavior and return a tree.

| cur: | a pointer to an array of xmlChar | | encoding: | a free form C string describing the HTML document encoding, or NULL | | sax: | the SAX handler block | | userData: | if using SAX, this pointer will be provided on callbacks. | | Returns: | the resulting document tree unless SAX is NULL or the document is not well formed. |

Function: htmlSAXParseFile

[htmlDocPtr](libxml-HTMLparser.html#htmlDocPtr)htmlSAXParseFile	(const char * filename,
const char * encoding,
[htmlSAXHandlerPtr](libxml-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * userData)

parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.

| filename: | the filename | | encoding: | a free form C string describing the HTML document encoding, or NULL | | sax: | the SAX handler block | | userData: | if using SAX, this pointer will be provided on callbacks. | | Returns: | the resulting document tree unless SAX is NULL or the document is not well formed. |

Function: htmlTagLookup

const[htmlElemDesc](libxml-HTMLparser.html#htmlElemDesc)*	htmlTagLookup	(const[xmlChar](libxml-xmlstring.html#xmlChar)* tag)

Lookup the HTML tag in the ElementTable

| tag: | The tag name in lowercase | | Returns: | the related htmlElemDescPtr or NULL if not found. |

Daniel Veillard

|

|

|

|

|