Newer
Older
/*
* HTMLtree.c : implementation of access function for an HTML tree.
*
* See Copyright for the status of this software.
*
* daniel@veillard.com
*/
#define IN_LIBXML
#include "libxml.h"
#ifdef LIBXML_HTML_ENABLED
#include <string.h> /* for memset() only ! */
#include <ctype.h>
#include <stdlib.h>
#include <libxml/xmlmemory.h>
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
#include <libxml/entities.h>
#include <libxml/xmlerror.h>
#include <libxml/parserInternals.h>
#include <libxml/uri.h>
#include "private/buf.h"
#include "private/error.h"
#include "private/io.h"
#include "private/save.h"
/************************************************************************
* *
* *
************************************************************************/
/**
* htmlGetMetaEncoding:
* @doc: the document
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
* Encoding definition lookup in the Meta tags
*
* Returns the current encoding as flagged in the HTML source
*/
const xmlChar *
htmlGetMetaEncoding(htmlDocPtr doc) {
htmlNodePtr cur;
const xmlChar *content;
const xmlChar *encoding;
if (doc == NULL)
return(NULL);
cur = doc->children;
/*
* Search the html
*/
while (cur != NULL) {
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
if (xmlStrEqual(cur->name, BAD_CAST"html"))
break;
if (xmlStrEqual(cur->name, BAD_CAST"head"))
goto found_head;
if (xmlStrEqual(cur->name, BAD_CAST"meta"))
goto found_meta;
}
cur = cur->next;
}
if (cur == NULL)
return(NULL);
cur = cur->children;
/*
* Search the head
*/
while (cur != NULL) {
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
if (xmlStrEqual(cur->name, BAD_CAST"head"))
break;
if (xmlStrEqual(cur->name, BAD_CAST"meta"))
goto found_meta;
}
cur = cur->next;
}
if (cur == NULL)
return(NULL);
found_head:
cur = cur->children;
/*
* Search the meta elements
*/
found_meta:
while (cur != NULL) {
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
xmlAttrPtr attr = cur->properties;
int http;
const xmlChar *value;
content = NULL;
http = 0;
while (attr != NULL) {
if ((attr->children != NULL) &&
(attr->children->type == XML_TEXT_NODE) &&
(attr->children->next == NULL)) {
value = attr->children->content;
if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
http = 1;
else if ((value != NULL)
&& (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
content = value;
if ((http != 0) && (content != NULL))
goto found_content;
}
attr = attr->next;
}
}
}
cur = cur->next;
}
return(NULL);
found_content:
encoding = xmlStrstr(content, BAD_CAST"charset=");
encoding = xmlStrstr(content, BAD_CAST"Charset=");
encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
if (encoding != NULL) {
encoding += 8;
} else {
encoding = xmlStrstr(content, BAD_CAST"charset =");
encoding = xmlStrstr(content, BAD_CAST"Charset =");
encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
if (encoding != NULL)
encoding += 9;
}
if (encoding != NULL) {
while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
}
return(encoding);
}
/**
* htmlSetMetaEncoding:
* @doc: the document
* @encoding: the encoding string
* Sets the current encoding in the Meta tags
* NOTE: this will not change the document content encoding, just
* the META flag associated.
*
* Returns 0 in case of success and -1 in case of error
*/
int
htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
htmlNodePtr cur, meta = NULL, head = NULL;
const xmlChar *content = NULL;
if (doc == NULL)
return(-1);
/* html isn't a real encoding it's just libxml2 way to get entities */
if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
return(-1);
if (encoding != NULL) {
snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
newcontent[sizeof(newcontent) - 1] = 0;
}
cur = doc->children;
/*
* Search the html
*/
while (cur != NULL) {
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
break;
if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
goto found_head;
if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
goto found_meta;
}
cur = cur->next;
}
if (cur == NULL)
return(-1);
cur = cur->children;
/*
* Search the head
*/
while (cur != NULL) {
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
break;
if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
head = cur->parent;
}
cur = cur->next;
}
if (cur == NULL)
return(-1);
found_head:
head = cur;
if (cur->children == NULL)
goto create;
cur = cur->children;
found_meta:
/*
* Search and update all the remaining the meta elements carrying
*/
while (cur != NULL) {
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
xmlAttrPtr attr = cur->properties;
int http;
const xmlChar *value;
content = NULL;
http = 0;
while (attr != NULL) {
if ((attr->children != NULL) &&
(attr->children->type == XML_TEXT_NODE) &&
(attr->children->next == NULL)) {
value = attr->children->content;
if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
http = 1;
(!xmlStrcasecmp(attr->name, BAD_CAST"content")))
content = value;
}
if ((http != 0) && (content != NULL))
break;
}
attr = attr->next;
}
if ((http != 0) && (content != NULL)) {
meta = cur;
}
}
}
cur = cur->next;
}
create:
if (meta == NULL) {
if ((encoding != NULL) && (head != NULL)) {
/*
* Create a new Meta element with the right attributes
*/
meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
if (head->children == NULL)
xmlAddChild(head, meta);
else
xmlAddPrevSibling(head->children, meta);
xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
}
} else {
/* remove the meta tag if NULL is passed */
if (encoding == NULL) {
xmlUnlinkNode(meta);
xmlFreeNode(meta);
}
/* change the document only if there is a real encoding change */
else if (xmlStrcasestr(content, encoding) == NULL) {
xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
}
}
return(0);
}
/**
* booleanHTMLAttrs:
*
* These are the HTML attributes which will be output
* in minimized form, i.e. <option selected="selected"> will be
* output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
*
*/
static const char* const htmlBooleanAttrs[] = {
"checked", "compact", "declare", "defer", "disabled", "ismap",
"multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
"selected", NULL
};
/**
* htmlIsBooleanAttr:
* @name: the name of the attribute to check
*
* Determine if a given attribute is a boolean attribute.
* returns: false if the attribute is not boolean, true otherwise.
*/
int
htmlIsBooleanAttr(const xmlChar *name)
{
int i = 0;
while (htmlBooleanAttrs[i] != NULL) {
if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
return 1;
i++;
}
return 0;
}
#ifdef LIBXML_OUTPUT_ENABLED
/************************************************************************
* *
* *
************************************************************************/
/**
* htmlSaveErrMemory:
*
* Handle an out of memory condition
*/
static void
htmlSaveErrMemory(const char *extra)
{
__xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
}
/**
* htmlSaveErr:
* @code: the error number
* @node: the location of the error.
*
* Handle an out of memory condition
*/
static void
htmlSaveErr(int code, xmlNodePtr node, const char *extra)
{
const char *msg = NULL;
switch(code) {
case XML_SAVE_NOT_UTF8:
break;
case XML_SAVE_CHAR_INVALID:
break;
case XML_SAVE_UNKNOWN_ENCODING:
break;
case XML_SAVE_NO_DOCTYPE:
}
__xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
}
/************************************************************************
* *
* Dumping HTML tree content to a simple buffer *
* *
************************************************************************/
/**
* htmlBufNodeDumpFormat:
* @buf: the xmlBufPtr output
* @doc: the document
* @cur: the current node
* @format: should formatting spaces been added
*
* Dump an HTML node, recursive behaviour,children are printed too.
*
* Returns the number of byte written or -1 in case of error
*/
static size_t
htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
int ret;
xmlOutputBufferPtr outbuf;
if (cur == NULL) {
return (-1);
}
if (buf == NULL) {
return (-1);
}
outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
if (outbuf == NULL) {
htmlSaveErrMemory("allocating HTML output buffer");
return (-1);
}
outbuf->buffer = buf;
outbuf->encoder = NULL;
outbuf->writecallback = NULL;
outbuf->closecallback = NULL;
outbuf->context = NULL;
outbuf->written = 0;
htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
xmlFree(outbuf);
return (ret);
}
/**
* htmlNodeDump:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the current node
*
* Dump an HTML node, recursive behaviour,children are printed too,
* and formatting returns are added.
*
* Returns the number of byte written or -1 in case of error
*/
int
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
xmlBufPtr buffer;
size_t ret;
if ((buf == NULL) || (cur == NULL))
return(-1);
buffer = xmlBufFromBuffer(buf);
if (buffer == NULL)
return(-1);
ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
xmlBufBackToBuffer(buffer);
if (ret > INT_MAX)
return(-1);
return((int) ret);
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
}
/**
* htmlNodeDumpFileFormat:
* @out: the FILE pointer
* @doc: the document
* @cur: the current node
* @encoding: the document encoding
* @format: should formatting spaces been added
*
* Dump an HTML node, recursive behaviour,children are printed too.
*
* TODO: if encoding == NULL try to save in the doc encoding
*
* returns: the number of byte written or -1 in case of failure.
*/
int
htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
xmlNodePtr cur, const char *encoding, int format) {
xmlOutputBufferPtr buf;
xmlCharEncodingHandlerPtr handler = NULL;
int ret;
xmlInitParser();
if (encoding != NULL) {
xmlCharEncoding enc;
enc = xmlParseCharEncoding(encoding);
if (enc != XML_CHAR_ENCODING_UTF8) {
handler = xmlFindCharEncodingHandler(encoding);
if (handler == NULL)
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
} else {
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if (handler == NULL)
handler = xmlFindCharEncodingHandler("HTML");
if (handler == NULL)
handler = xmlFindCharEncodingHandler("ascii");
* save the content to a temp buffer.
*/
buf = xmlOutputBufferCreateFile(out, handler);
if (buf == NULL) return(0);
htmlNodeDumpFormatOutput(buf, doc, cur, NULL, format);
ret = xmlOutputBufferClose(buf);
return(ret);
}
/**
* htmlNodeDumpFile:
* @out: the FILE pointer
* @doc: the document
* @cur: the current node
*
* Dump an HTML node, recursive behaviour,children are printed too,
* and formatting returns are added.
*/
void
htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
}
/**
* @cur: the document
* @mem: OUT: the memory pointer
* @size: OUT: the memory length
* @format: should formatting spaces been added
*
* Dump an HTML document in memory and return the xmlChar * and it's size.
* It's up to the caller to free the memory.
*/
void
htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
xmlOutputBufferPtr buf;
xmlCharEncodingHandlerPtr handler = NULL;
const char *encoding;
xmlInitParser();
if ((mem == NULL) || (size == NULL))
return;
if (cur == NULL) {
*mem = NULL;
*size = 0;
return;
}
encoding = (const char *) htmlGetMetaEncoding(cur);
if (encoding != NULL) {
xmlCharEncoding enc;
enc = xmlParseCharEncoding(encoding);
handler = xmlFindCharEncodingHandler(encoding);
if (handler == NULL)
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
} else {
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if (handler == NULL)
handler = xmlFindCharEncodingHandler("HTML");
if (handler == NULL)
handler = xmlFindCharEncodingHandler("ascii");
buf = xmlAllocOutputBufferInternal(handler);
if (buf == NULL) {
*mem = NULL;
*size = 0;
return;
}
htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
xmlOutputBufferFlush(buf);
if (buf->conv != NULL) {
*size = xmlBufUse(buf->conv);
*mem = xmlStrndup(xmlBufContent(buf->conv), *size);
*size = xmlBufUse(buf->buffer);
*mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
}
(void)xmlOutputBufferClose(buf);
}
/**
* htmlDocDumpMemory:
* @cur: the document
* @mem: OUT: the memory pointer
* @size: OUT: the memory length
*
* Dump an HTML document in memory and return the xmlChar * and it's size.
* It's up to the caller to free the memory.
*/
void
htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
htmlDocDumpMemoryFormat(cur, mem, size, 1);
}
/************************************************************************
* *
* Dumping HTML tree content to an I/O output buffer *
* *
************************************************************************/
/**
* htmlDtdDumpOutput:
* @buf: the HTML buffer output
* @doc: the document
* @encoding: the encoding string
* TODO: check whether encoding is needed
*
* Dump the HTML document DTD, if any.
*/
static void
htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
const char *encoding ATTRIBUTE_UNUSED) {
xmlDtdPtr cur = doc->intSubset;
if (cur == NULL) {
htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
return;
}
xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
xmlOutputBufferWriteString(buf, (const char *)cur->name);
if (cur->ExternalID != NULL) {
xmlOutputBufferWriteString(buf, " PUBLIC ");
xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
if (cur->SystemID != NULL) {
xmlOutputBufferWriteString(buf, " ");
xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
}
} else if (cur->SystemID != NULL &&
xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
xmlOutputBufferWriteString(buf, " SYSTEM ");
xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
}
xmlOutputBufferWriteString(buf, ">\n");
}
/**
* htmlAttrDumpOutput:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the attribute pointer
*
* Dump an HTML attribute
*/
static void
htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
xmlChar *value;
/*
* The html output method should not escape a & character
* occurring in an attribute value immediately followed by
* a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
* This is implemented in xmlEncodeEntitiesReentrant
*/
if (cur == NULL) {
return;
}
xmlOutputBufferWriteString(buf, " ");
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
xmlOutputBufferWriteString(buf, ":");
}
xmlOutputBufferWriteString(buf, (const char *)cur->name);
if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
value = xmlNodeListGetString(doc, cur->children, 0);
if (value) {
xmlOutputBufferWriteString(buf, "=");
if ((cur->ns == NULL) && (cur->parent != NULL) &&
(cur->parent->ns == NULL) &&
((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
(!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
(!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
(!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
xmlChar *tmp = value;
while (IS_BLANK_CH(*tmp)) tmp++;
* Angle brackets are technically illegal in URIs, but they're
* used in server side includes, for example. Curly brackets
* are illegal as well and often used in templates.
* Don't escape non-whitespace, printable ASCII chars for
* improved interoperability. Only escape space, control
* and non-ASCII chars.
escaped = xmlURIEscapeStr(tmp,
BAD_CAST "\"#$%&+,/:;<=>?@[\\]^`{|}");
if (escaped != NULL) {
xmlBufWriteQuotedString(buf->buffer, escaped);
xmlFree(escaped);
} else {
xmlBufWriteQuotedString(buf->buffer, value);
xmlBufWriteQuotedString(buf->buffer, value);
}
xmlFree(value);
} else {
xmlOutputBufferWriteString(buf, "=\"\"");
}
}
}
/**
* htmlNodeDumpFormatOutput:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the current node
* @encoding: the encoding string (unused)
* @format: should formatting spaces been added
*
* Dump an HTML node, recursive behaviour,children are printed too.
*/
void
htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
int format) {
const htmlElemDesc * info;
xmlInitParser();
if ((cur == NULL) || (buf == NULL)) {
while (1) {
switch (cur->type) {
case XML_HTML_DOCUMENT_NODE:
case XML_DOCUMENT_NODE:
if (((xmlDocPtr) cur)->intSubset != NULL) {
htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
}
if (cur->children != NULL) {
/* Always validate cur->parent when descending. */
if (cur->parent == parent) {
parent = cur;
cur = cur->children;
continue;
}
} else {
xmlOutputBufferWriteString(buf, "\n");
/*
* Some users like lxml are known to pass nodes with a corrupted
* tree structure. Fall back to a recursive call to handle this
* case.
*/
if ((cur->parent != parent) && (cur->children != NULL)) {
htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
break;
}
/*
* Get specific HTML info for that node.
*/
if (cur->ns == NULL)
info = htmlTagLookup(cur->name);
else
info = NULL;
xmlOutputBufferWriteString(buf, "<");
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
xmlOutputBufferWriteString(buf, ":");
}
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
xmlOutputBufferWriteString(buf, (const char *)cur->name);
if (cur->nsDef)
xmlNsListDumpOutput(buf, cur->nsDef);
attr = cur->properties;
while (attr != NULL) {
htmlAttrDumpOutput(buf, doc, attr);
attr = attr->next;
}
if ((info != NULL) && (info->empty)) {
xmlOutputBufferWriteString(buf, ">");
} else if (cur->children == NULL) {
if ((info != NULL) && (info->saveEndTag != 0) &&
(xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
(xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
xmlOutputBufferWriteString(buf, ">");
} else {
xmlOutputBufferWriteString(buf, "></");
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
xmlOutputBufferWriteString(buf,
(const char *)cur->ns->prefix);
xmlOutputBufferWriteString(buf, ":");
}
xmlOutputBufferWriteString(buf, (const char *)cur->name);
xmlOutputBufferWriteString(buf, ">");
}
} else {
xmlOutputBufferWriteString(buf, ">");
if ((format) && (info != NULL) && (!info->isinline) &&
(cur->children->type != HTML_TEXT_NODE) &&
(cur->children->type != HTML_ENTITY_REF_NODE) &&
(cur->children != cur->last) &&
(cur->name != NULL) &&
(cur->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
cur = cur->children;
continue;
}
if ((format) && (cur->next != NULL) &&
(info != NULL) && (!info->isinline)) {
if ((cur->next->type != HTML_TEXT_NODE) &&
(cur->next->type != HTML_ENTITY_REF_NODE) &&
(parent != NULL) &&
(parent->name != NULL) &&
(parent->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
}
break;
case XML_ATTRIBUTE_NODE:
htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur);
break;
case HTML_TEXT_NODE:
if (cur->content == NULL)
break;
if (((cur->name == (const xmlChar *)xmlStringText) ||
(cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
((parent == NULL) ||
((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
(xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
xmlChar *buffer;
buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
if (buffer != NULL) {
xmlOutputBufferWriteString(buf, (const char *)buffer);
xmlFree(buffer);
}
} else {
xmlOutputBufferWriteString(buf, (const char *)cur->content);
}
break;
case HTML_COMMENT_NODE:
if (cur->content != NULL) {
xmlOutputBufferWriteString(buf, "<!--");
xmlOutputBufferWriteString(buf, (const char *)cur->content);
xmlOutputBufferWriteString(buf, "-->");
}
break;
case HTML_PI_NODE:
if (cur->name != NULL) {
xmlOutputBufferWriteString(buf, "<?");
xmlOutputBufferWriteString(buf, (const char *)cur->name);
if (cur->content != NULL) {
xmlOutputBufferWriteString(buf, " ");
xmlOutputBufferWriteString(buf,
(const char *)cur->content);
}
xmlOutputBufferWriteString(buf, ">");
}
break;
case HTML_ENTITY_REF_NODE:
xmlOutputBufferWriteString(buf, "&");
xmlOutputBufferWriteString(buf, (const char *)cur->name);
xmlOutputBufferWriteString(buf, ";");
break;
case HTML_PRESERVE_NODE:
if (cur->content != NULL) {
xmlOutputBufferWriteString(buf, (const char *)cur->content);
}
break;
default:
break;
}
while (1) {
if (cur == root)
return;
if (cur->next != NULL) {
cur = cur->next;
break;
}
cur = parent;
/* cur->parent was validated when descending. */
parent = cur->parent;
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
(cur->type == XML_DOCUMENT_NODE)) {
xmlOutputBufferWriteString(buf, "\n");
} else {
if ((format) && (cur->ns == NULL))
info = htmlTagLookup(cur->name);
else
info = NULL;
if ((format) && (info != NULL) && (!info->isinline) &&
(cur->last->type != HTML_TEXT_NODE) &&
(cur->last->type != HTML_ENTITY_REF_NODE) &&
(cur->children != cur->last) &&
(cur->name != NULL) &&
(cur->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
xmlOutputBufferWriteString(buf, "</");
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
xmlOutputBufferWriteString(buf, ":");
}
xmlOutputBufferWriteString(buf, (const char *)cur->name);
xmlOutputBufferWriteString(buf, ">");
if ((format) && (info != NULL) && (!info->isinline) &&
(cur->next != NULL)) {
if ((cur->next->type != HTML_TEXT_NODE) &&
(cur->next->type != HTML_ENTITY_REF_NODE) &&
(parent != NULL) &&
(parent->name != NULL) &&
(parent->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
}
}
}
}
}
/**
* htmlNodeDumpOutput:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the current node
* @encoding: the encoding string (unused)
*
* Dump an HTML node, recursive behaviour,children are printed too,
* and formatting returns/spaces are added.
*/
void
htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED) {
htmlNodeDumpFormatOutput(buf, doc, cur, NULL, 1);
}
/**
* htmlDocContentDumpFormatOutput:
* @buf: the HTML buffer output
* @cur: the document
* @encoding: the encoding string (unused)
* @format: should formatting spaces been added
*
* Dump an HTML document.
*/
void
htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
const char *encoding ATTRIBUTE_UNUSED,
int format) {
int type = 0;
if (cur) {
type = cur->type;
cur->type = XML_HTML_DOCUMENT_NODE;
}
htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, format);
if (cur)
cur->type = (xmlElementType) type;
}
/**
* htmlDocContentDumpOutput: