Back to Turicreate

HTMLparser: interface for an HTML 4.0 non

deps/src/libxml2-2.9.1/doc/devhelp/libxml2-HTMLparser.html

6.4.131.2 KB
Original Source

| | | | | libxml2 Reference Manual |

HTMLparser

HTMLparser - interface for an HTML 4.0 non-verifying parser

this module implements an HTML 4.0 non-verifying parser with API compatible with the XML parser ones. It should be able to parse "real world" HTML, even if severely broken from a specification point of view.

Author(s): Daniel Veillard

Synopsis

#define[htmlDefaultSubelement](#htmlDefaultSubelement)(elt);
#define[htmlElementAllowedHereDesc](#htmlElementAllowedHereDesc)(parent, elt);
#define[htmlRequiredAttrs](#htmlRequiredAttrs)(elt);
typedef[xmlParserNodeInfo](libxml2-parser.html#xmlParserNodeInfo)[htmlParserNodeInfo](#htmlParserNodeInfo);
typedef[xmlParserInput](libxml2-tree.html#xmlParserInput)[htmlParserInput](#htmlParserInput);
typedef[xmlParserCtxtPtr](libxml2-tree.html#xmlParserCtxtPtr)[htmlParserCtxtPtr](#htmlParserCtxtPtr);
typedef struct _htmlEntityDesc[htmlEntityDesc](#htmlEntityDesc);
typedef[xmlDocPtr](libxml2-tree.html#xmlDocPtr)[htmlDocPtr](#htmlDocPtr);
typedef[xmlSAXHandlerPtr](libxml2-tree.html#xmlSAXHandlerPtr)[htmlSAXHandlerPtr](#htmlSAXHandlerPtr);
typedef enum[htmlStatus](#htmlStatus);
typedef[xmlNodePtr](libxml2-tree.html#xmlNodePtr)[htmlNodePtr](#htmlNodePtr);
typedef[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)*[htmlElemDescPtr](#htmlElemDescPtr);
typedef struct _htmlElemDesc[htmlElemDesc](#htmlElemDesc);
typedef[xmlSAXHandler](libxml2-tree.html#xmlSAXHandler)[htmlSAXHandler](#htmlSAXHandler);
typedef[xmlParserInputPtr](libxml2-tree.html#xmlParserInputPtr)[htmlParserInputPtr](#htmlParserInputPtr);
typedef enum[htmlParserOption](#htmlParserOption);
typedef[htmlEntityDesc](libxml2-HTMLparser.html#htmlEntityDesc)*[htmlEntityDescPtr](#htmlEntityDescPtr);
typedef[xmlParserCtxt](libxml2-tree.html#xmlParserCtxt)[htmlParserCtxt](#htmlParserCtxt);
int[htmlIsScriptAttribute](#htmlIsScriptAttribute)(const[xmlChar](libxml2-xmlstring.html#xmlChar)* name);
int[htmlHandleOmittedElem](#htmlHandleOmittedElem)(int val);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlReadFd](#htmlReadFd)(int fd,
const char * URL,
const char * encoding,
int options);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlReadIO](#htmlReadIO)([xmlInputReadCallback](libxml2-xmlIO.html#xmlInputReadCallback)ioread,
[xmlInputCloseCallback](libxml2-xmlIO.html#xmlInputCloseCallback)ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlParseFile](#htmlParseFile)(const char * filename,
const char * encoding);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlCtxtReadDoc](#htmlCtxtReadDoc)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const[xmlChar](libxml2-xmlstring.html#xmlChar)* cur,
const char * URL,
const char * encoding,
int options);
int[htmlAutoCloseTag](#htmlAutoCloseTag)([htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)doc,
const[xmlChar](libxml2-xmlstring.html#xmlChar)* name,
[htmlNodePtr](libxml2-HTMLparser.html#htmlNodePtr)elem);
int[htmlParseChunk](#htmlParseChunk)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * chunk,
int size,
int terminate);
const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)*[htmlTagLookup](#htmlTagLookup)(const[xmlChar](libxml2-xmlstring.html#xmlChar)* tag);[htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)[htmlCreateMemoryParserCtxt](#htmlCreateMemoryParserCtxt)(const char * buffer,
int size);
void[htmlCtxtReset](#htmlCtxtReset)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt);
int[htmlElementAllowedHere](#htmlElementAllowedHere)(const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* parent,
const[xmlChar](libxml2-xmlstring.html#xmlChar)* elt);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlCtxtReadIO](#htmlCtxtReadIO)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
[xmlInputReadCallback](libxml2-xmlIO.html#xmlInputReadCallback)ioread,
[xmlInputCloseCallback](libxml2-xmlIO.html#xmlInputCloseCallback)ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options);[htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)[htmlCreatePushParserCtxt](#htmlCreatePushParserCtxt)([htmlSAXHandlerPtr](libxml2-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * user_data,
const char * chunk,
int size,
const char * filename,
[xmlCharEncoding](libxml2-encoding.html#xmlCharEncoding)enc);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlReadMemory](#htmlReadMemory)(const char * buffer,
int size,
const char * URL,
const char * encoding,
int options);
int[htmlIsAutoClosed](#htmlIsAutoClosed)([htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)doc,
[htmlNodePtr](libxml2-HTMLparser.html#htmlNodePtr)elem);
int[htmlParseCharRef](#htmlParseCharRef)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlReadDoc](#htmlReadDoc)(const[xmlChar](libxml2-xmlstring.html#xmlChar)* cur,
const char * URL,
const char * encoding,
int options);
int[htmlEncodeEntities](#htmlEncodeEntities)(unsigned char * out,
int * outlen,
const unsigned char * in,
int * inlen,
int quoteChar);[htmlStatus](libxml2-HTMLparser.html#htmlStatus)[htmlNodeStatus](#htmlNodeStatus)(const[htmlNodePtr](libxml2-HTMLparser.html#htmlNodePtr)node,
int legacy);[htmlStatus](libxml2-HTMLparser.html#htmlStatus)[htmlAttrAllowed](#htmlAttrAllowed)(const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* elt,
const[xmlChar](libxml2-xmlstring.html#xmlChar)* attr,
int legacy);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlSAXParseFile](#htmlSAXParseFile)(const char * filename,
const char * encoding,
[htmlSAXHandlerPtr](libxml2-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * userData);
const[htmlEntityDesc](libxml2-HTMLparser.html#htmlEntityDesc)*[htmlParseEntityRef](#htmlParseEntityRef)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const[xmlChar](libxml2-xmlstring.html#xmlChar)** str);[htmlStatus](libxml2-HTMLparser.html#htmlStatus)[htmlElementStatusHere](#htmlElementStatusHere)(const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* parent,
const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* elt);
const[htmlEntityDesc](libxml2-HTMLparser.html#htmlEntityDesc)*[htmlEntityValueLookup](#htmlEntityValueLookup)(unsigned int value);
void[htmlParseElement](#htmlParseElement)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt);
int[UTF8ToHtml](#UTF8ToHtml)(unsigned char * out,
int * outlen,
const unsigned char * in,
int * inlen);
const[htmlEntityDesc](libxml2-HTMLparser.html#htmlEntityDesc)*[htmlEntityLookup](#htmlEntityLookup)(const[xmlChar](libxml2-xmlstring.html#xmlChar)* name);
void[htmlFreeParserCtxt](#htmlFreeParserCtxt)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlCtxtReadMemory](#htmlCtxtReadMemory)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * buffer,
int size,
const char * URL,
const char * encoding,
int options);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlCtxtReadFd](#htmlCtxtReadFd)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
int fd,
const char * URL,
const char * encoding,
int options);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlReadFile](#htmlReadFile)(const char * filename,
const char * encoding,
int options);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlCtxtReadFile](#htmlCtxtReadFile)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * filename,
const char * encoding,
int options);
int[htmlParseDocument](#htmlParseDocument)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt);[htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)[htmlNewParserCtxt](#htmlNewParserCtxt)(void);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlSAXParseDoc](#htmlSAXParseDoc)([xmlChar](libxml2-xmlstring.html#xmlChar)* cur,
const char * encoding,
[htmlSAXHandlerPtr](libxml2-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * userData);
int[htmlCtxtUseOptions](#htmlCtxtUseOptions)([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
int options);[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)[htmlParseDoc](#htmlParseDoc)([xmlChar](libxml2-xmlstring.html#xmlChar)* cur,
const char * encoding);

Description

Details

Macro htmlDefaultSubelement

#define[htmlDefaultSubelement](#htmlDefaultSubelement)(elt);

Returns the default subelement for this element

| elt: | HTML element |


Macro htmlElementAllowedHereDesc

#define[htmlElementAllowedHereDesc](#htmlElementAllowedHereDesc)(parent, elt);

Checks whether an HTML element description may be a direct child of the specified element. Returns 1 if allowed; 0 otherwise.

| parent: | HTML parent element | | elt: | HTML element |


Macro htmlRequiredAttrs

#define[htmlRequiredAttrs](#htmlRequiredAttrs)(elt);

Returns the attributes required for the specified element.

| elt: | HTML element |


Typedef htmlDocPtr

[xmlDocPtr](libxml2-tree.html#xmlDocPtr)htmlDocPtr;

Structure htmlElemDesc

struct _htmlElemDesc {
    const char *	name	: The tag name
    char	startTag	: Whether the start tag can be implied
    char	endTag	: Whether the end tag can be implied
    char	saveEndTag	: Whether the end tag should be saved
    char	empty	: Is this an empty element ?
    char	depr	: Is this a deprecated element ?
    char	dtd	: 1: only in Loose DTD, 2: only Frameset one
    char	isinline	: is this a block 0 or inline 1 element
    const char *	desc	: the description NRK Jan.2003 * New fields encapsulating HTML structur
    const char **	subelts	: allowed sub-elements of this element
    const char *	defaultsubelt	: subelement for suggested auto-repair if necessary or NULL
    const char **	attrs_opt	: Optional Attributes
    const char **	attrs_depr	: Additional deprecated attributes
    const char **	attrs_req	: Required attributes
} htmlElemDesc;

Typedef htmlElemDescPtr

[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* htmlElemDescPtr;

Structure htmlEntityDesc

struct _htmlEntityDesc {
    unsigned int	value	: the UNICODE value for the character
    const char *	name	: The entity name
    const char *	desc	: the description
} htmlEntityDesc;

Typedef htmlEntityDescPtr

[htmlEntityDesc](libxml2-HTMLparser.html#htmlEntityDesc)* htmlEntityDescPtr;

Typedef htmlNodePtr

[xmlNodePtr](libxml2-tree.html#xmlNodePtr)htmlNodePtr;

Typedef htmlParserCtxt

[xmlParserCtxt](libxml2-tree.html#xmlParserCtxt)htmlParserCtxt;

Typedef htmlParserCtxtPtr

[xmlParserCtxtPtr](libxml2-tree.html#xmlParserCtxtPtr)htmlParserCtxtPtr;

Typedef htmlParserInput

[xmlParserInput](libxml2-tree.html#xmlParserInput)htmlParserInput;

Typedef htmlParserInputPtr

[xmlParserInputPtr](libxml2-tree.html#xmlParserInputPtr)htmlParserInputPtr;

Typedef htmlParserNodeInfo

[xmlParserNodeInfo](libxml2-parser.html#xmlParserNodeInfo)htmlParserNodeInfo;

Enum htmlParserOption

enum[htmlParserOption](#htmlParserOption){HTML\_PARSE\_RECOVER= 1 /* Relaxed parsing */HTML\_PARSE\_NODEFDTD= 4 /* do not default a doctype if not found */HTML\_PARSE\_NOERROR= 32 /* suppress error reports */HTML\_PARSE\_NOWARNING= 64 /* suppress warning reports */HTML\_PARSE\_PEDANTIC= 128 /* pedantic error reporting */HTML\_PARSE\_NOBLANKS= 256 /* remove blank nodes */HTML\_PARSE\_NONET= 2048 /* Forbid network access */HTML\_PARSE\_NOIMPLIED= 8192 /* Do not add implied html/body... elements */HTML\_PARSE\_COMPACT= 65536 /* compact small text nodes */HTML\_PARSE\_IGNORE\_ENC= 2097152 /* ignore internal document encoding hint */
};

Typedef htmlSAXHandler

[xmlSAXHandler](libxml2-tree.html#xmlSAXHandler)htmlSAXHandler;

Typedef htmlSAXHandlerPtr

[xmlSAXHandlerPtr](libxml2-tree.html#xmlSAXHandlerPtr)htmlSAXHandlerPtr;

Enum htmlStatus

enum[htmlStatus](#htmlStatus){HTML\_NA= 0 /* something we don't check at all */HTML\_INVALID= 1HTML\_DEPRECATED= 2HTML\_VALID= 4HTML\_REQUIRED= 12 /* VALID bit set so ( & HTML_VALID ) is TRUE */
};

UTF8ToHtml ()

int	UTF8ToHtml (unsigned char * out,
int * outlen,
const unsigned char * in,
int * inlen)

Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.

| out: | a pointer to an array of bytes to store the result | | outlen: | the length of @out | | in: | a pointer to an array of UTF-8 chars | | inlen: | the length of @in | | Returns: | 0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed. |


htmlAttrAllowed ()

[htmlStatus](libxml2-HTMLparser.html#htmlStatus)htmlAttrAllowed (const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* elt,
const[xmlChar](libxml2-xmlstring.html#xmlChar)* attr,
int legacy)

Checks whether an attribute is valid for an element Has full knowledge of Required and Deprecated attributes

| elt: | HTML element | | attr: | HTML attribute | | legacy: | whether to allow deprecated attributes | | Returns: | one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, HTML_INVALID |


htmlAutoCloseTag ()

int	htmlAutoCloseTag ([htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)doc,
const[xmlChar](libxml2-xmlstring.html#xmlChar)* name,
[htmlNodePtr](libxml2-HTMLparser.html#htmlNodePtr)elem)

The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if the element or one of it's children would autoclose the given tag.

| doc: | the HTML document | | name: | The tag name | | elem: | the HTML element | | Returns: | 1 if autoclose, 0 otherwise |


htmlCreateMemoryParserCtxt ()

[htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)htmlCreateMemoryParserCtxt	(const char * buffer,
int size)

Create a parser context for an HTML in-memory document.

| buffer: | a pointer to a char array | | size: | the size of the array | | Returns: | the new parser context or NULL |


htmlCreatePushParserCtxt ()

[htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)htmlCreatePushParserCtxt	([htmlSAXHandlerPtr](libxml2-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * user_data,
const char * chunk,
int size,
const char * filename,
[xmlCharEncoding](libxml2-encoding.html#xmlCharEncoding)enc)

Create a parser context for using the HTML parser in push mode The value of @filename is used for fetching external entities and error/warning reports.

| sax: | a SAX handler | | user_data: | The user data returned on SAX callbacks | | chunk: | a pointer to an array of chars | | size: | number of chars in the array | | filename: | an optional file name or URI | | enc: | an optional encoding | | Returns: | the new parser context or NULL |


htmlCtxtReadDoc ()

[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlCtxtReadDoc ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const[xmlChar](libxml2-xmlstring.html#xmlChar)* cur,
const char * URL,
const char * encoding,
int options)

parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context

| ctxt: | an HTML parser context | | cur: | a pointer to a zero terminated string | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |


htmlCtxtReadFd ()

[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlCtxtReadFd ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
int fd,
const char * URL,
const char * encoding,
int options)

parse an XML from a file descriptor and build a tree. This reuses the existing @ctxt parser context

| ctxt: | an HTML parser context | | fd: | an open file descriptor | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |


htmlCtxtReadFile ()

[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlCtxtReadFile	([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * filename,
const char * encoding,
int options)

parse an XML file from the filesystem or the network. This reuses the existing @ctxt parser context

| ctxt: | an HTML parser context | | filename: | a file or URL | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |


htmlCtxtReadIO ()

[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlCtxtReadIO ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
[xmlInputReadCallback](libxml2-xmlIO.html#xmlInputReadCallback)ioread,
[xmlInputCloseCallback](libxml2-xmlIO.html#xmlInputCloseCallback)ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options)

parse an HTML document from I/O functions and source and build a tree. This reuses the existing @ctxt parser context

| ctxt: | an HTML parser context | | ioread: | an I/O read function | | ioclose: | an I/O close function | | ioctx: | an I/O handler | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |


htmlCtxtReadMemory ()

[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlCtxtReadMemory	([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * buffer,
int size,
const char * URL,
const char * encoding,
int options)

parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context

| ctxt: | an HTML parser context | | buffer: | a pointer to a char array | | size: | the size of the array | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |


htmlCtxtReset ()

void	htmlCtxtReset ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt)

Reset a parser context

| ctxt: | an HTML parser context |


htmlCtxtUseOptions ()

int	htmlCtxtUseOptions ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
int options)

Applies the options to the parser context

| ctxt: | an HTML parser context | | options: | a combination of htmlParserOption(s) | | Returns: | 0 in case of success, the set of unknown or unimplemented options in case of error. |


htmlElementAllowedHere ()

int	htmlElementAllowedHere (const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* parent,
const[xmlChar](libxml2-xmlstring.html#xmlChar)* elt)

Checks whether an HTML element may be a direct child of a parent element. Note - doesn't check for deprecated elements

| parent: | HTML parent element | | elt: | HTML element | | Returns: | 1 if allowed; 0 otherwise. |


htmlElementStatusHere ()

[htmlStatus](libxml2-HTMLparser.html#htmlStatus)htmlElementStatusHere	(const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* parent,
const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)* elt)

Checks whether an HTML element may be a direct child of a parent element. and if so whether it is valid or deprecated.

| parent: | HTML parent element | | elt: | HTML element | | Returns: | one of HTML_VALID, HTML_DEPRECATED, HTML_INVALID |


htmlEncodeEntities ()

int	htmlEncodeEntities (unsigned char * out,
int * outlen,
const unsigned char * in,
int * inlen,
int quoteChar)

Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.

| out: | a pointer to an array of bytes to store the result | | outlen: | the length of @out | | in: | a pointer to an array of UTF-8 chars | | inlen: | the length of @in | | quoteChar: | the quote character to escape (' or ") or zero. | | Returns: | 0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed. |


htmlEntityLookup ()

const[htmlEntityDesc](libxml2-HTMLparser.html#htmlEntityDesc)*	htmlEntityLookup	(const[xmlChar](libxml2-xmlstring.html#xmlChar)* name)

Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.

| name: | the entity name | | Returns: | the associated htmlEntityDescPtr if found, NULL otherwise. |


htmlEntityValueLookup ()

const[htmlEntityDesc](libxml2-HTMLparser.html#htmlEntityDesc)*	htmlEntityValueLookup	(unsigned int value)

Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.

| value: | the entity's unicode value | | Returns: | the associated htmlEntityDescPtr if found, NULL otherwise. |


htmlFreeParserCtxt ()

void	htmlFreeParserCtxt ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt)

Free all the memory used by a parser context. However the parsed document in ctxt->myDoc is not freed.

| ctxt: | an HTML parser context |


htmlHandleOmittedElem ()

int	htmlHandleOmittedElem (int val)

Set and return the previous value for handling HTML omitted tags.

| val: | int 0 or 1 | | Returns: | the last value for 0 for no handling, 1 for auto insertion. |


htmlIsAutoClosed ()

int	htmlIsAutoClosed ([htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)doc,
[htmlNodePtr](libxml2-HTMLparser.html#htmlNodePtr)elem)

The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if a tag is autoclosed by one of it's child

| doc: | the HTML document | | elem: | the HTML element | | Returns: | 1 if autoclosed, 0 otherwise |


htmlIsScriptAttribute ()

int	htmlIsScriptAttribute (const[xmlChar](libxml2-xmlstring.html#xmlChar)* name)

Check if an attribute is of content type Script

| name: | an attribute name | | Returns: | 1 is the attribute is a script 0 otherwise |


htmlNewParserCtxt ()

[htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)htmlNewParserCtxt	(void)

Allocate and initialize a new parser context.

| Returns: | the htmlParserCtxtPtr or NULL in case of allocation error |


htmlNodeStatus ()

[htmlStatus](libxml2-HTMLparser.html#htmlStatus)htmlNodeStatus (const[htmlNodePtr](libxml2-HTMLparser.html#htmlNodePtr)node,
int legacy)

Checks whether the tree node is valid. Experimental (the author only uses the HTML enhancements in a SAX parser)

| node: | an htmlNodePtr in a tree | | legacy: | whether to allow deprecated elements (YES is faster here for Element nodes) | | Returns: | for Element nodes, a return from htmlElementAllowedHere (if legacy allowed) or htmlElementStatusHere (otherwise). for Attribute nodes, a return from htmlAttrAllowed for other nodes, HTML_NA (no checks performed) |


htmlParseCharRef ()

int	htmlParseCharRef ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt)

parse Reference declarations [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'

| ctxt: | an HTML parser context | | Returns: | the value parsed (as an int) |


htmlParseChunk ()

int	htmlParseChunk ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const char * chunk,
int size,
int terminate)

Parse a Chunk of memory

| ctxt: | an HTML parser context | | chunk: | an char array | | size: | the size in byte of the chunk | | terminate: | last chunk indicator | | Returns: | zero if no error, the xmlParserErrors otherwise. |


htmlParseDoc ()

[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlParseDoc ([xmlChar](libxml2-xmlstring.html#xmlChar)* cur,
const char * encoding)

parse an HTML in-memory document and build a tree.

| cur: | a pointer to an array of xmlChar | | encoding: | a free form C string describing the HTML document encoding, or NULL | | Returns: | the resulting document tree |


htmlParseDocument ()

int	htmlParseDocument ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt)

parse an HTML document (and build a tree if using the standard SAX interface).

| ctxt: | an HTML parser context | | Returns: | 0, -1 in case of error. the parser context is augmented as a result of the parsing. |


htmlParseElement ()

void	htmlParseElement ([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt)

parse an HTML element, this is highly recursive this is kept for compatibility with previous code versions [39] element ::= EmptyElemTag | STag content ETag [41] Attribute ::= Name Eq AttValue

| ctxt: | an HTML parser context |


htmlParseEntityRef ()

const[htmlEntityDesc](libxml2-HTMLparser.html#htmlEntityDesc)*	htmlParseEntityRef	([htmlParserCtxtPtr](libxml2-HTMLparser.html#htmlParserCtxtPtr)ctxt,
const[xmlChar](libxml2-xmlstring.html#xmlChar)** str)

parse an HTML ENTITY references [68] EntityRef ::= '&' Name ';'

| ctxt: | an HTML parser context | | str: | location to store the entity name | | Returns: | the associated htmlEntityDescPtr if found, or NULL otherwise, if non-NULL *str will have to be freed by the caller. |


htmlParseFile ()

[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlParseFile (const char * filename,
const char * encoding)

parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.

| filename: | the filename | | encoding: | a free form C string describing the HTML document encoding, or NULL | | Returns: | the resulting document tree |


htmlReadDoc ()

[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlReadDoc (const[xmlChar](libxml2-xmlstring.html#xmlChar)* cur,
const char * URL,
const char * encoding,
int options)

parse an XML in-memory document and build a tree.

| cur: | a pointer to a zero terminated string | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |


htmlReadFd ()

[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlReadFd (int fd,
const char * URL,
const char * encoding,
int options)

parse an XML from a file descriptor and build a tree.

| fd: | an open file descriptor | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |


htmlReadFile ()

[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlReadFile (const char * filename,
const char * encoding,
int options)

parse an XML file from the filesystem or the network.

| filename: | a file or URL | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |


htmlReadIO ()

[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlReadIO ([xmlInputReadCallback](libxml2-xmlIO.html#xmlInputReadCallback)ioread,
[xmlInputCloseCallback](libxml2-xmlIO.html#xmlInputCloseCallback)ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options)

parse an HTML document from I/O functions and source and build a tree.

| ioread: | an I/O read function | | ioclose: | an I/O close function | | ioctx: | an I/O handler | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |


htmlReadMemory ()

[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlReadMemory (const char * buffer,
int size,
const char * URL,
const char * encoding,
int options)

parse an XML in-memory document and build a tree.

| buffer: | a pointer to a char array | | size: | the size of the array | | URL: | the base URL to use for the document | | encoding: | the document encoding, or NULL | | options: | a combination of htmlParserOption(s) | | Returns: | the resulting document tree |


htmlSAXParseDoc ()

[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlSAXParseDoc ([xmlChar](libxml2-xmlstring.html#xmlChar)* cur,
const char * encoding,
[htmlSAXHandlerPtr](libxml2-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * userData)

Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks to handle parse events. If sax is NULL, fallback to the default DOM behavior and return a tree.

| cur: | a pointer to an array of xmlChar | | encoding: | a free form C string describing the HTML document encoding, or NULL | | sax: | the SAX handler block | | userData: | if using SAX, this pointer will be provided on callbacks. | | Returns: | the resulting document tree unless SAX is NULL or the document is not well formed. |


htmlSAXParseFile ()

[htmlDocPtr](libxml2-HTMLparser.html#htmlDocPtr)htmlSAXParseFile	(const char * filename,
const char * encoding,
[htmlSAXHandlerPtr](libxml2-HTMLparser.html#htmlSAXHandlerPtr)sax,
void * userData)

parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.

| filename: | the filename | | encoding: | a free form C string describing the HTML document encoding, or NULL | | sax: | the SAX handler block | | userData: | if using SAX, this pointer will be provided on callbacks. | | Returns: | the resulting document tree unless SAX is NULL or the document is not well formed. |


htmlTagLookup ()

const[htmlElemDesc](libxml2-HTMLparser.html#htmlElemDesc)*	htmlTagLookup	(const[xmlChar](libxml2-xmlstring.html#xmlChar)* tag)

Lookup the HTML tag in the ElementTable

| tag: | The tag name in lowercase | | Returns: | the related htmlElemDescPtr or NULL if not found. |