Commit abb30ae3 authored by Murray Cumming's avatar Murray Cumming Committed by Murray Cumming

Added manual. Use GLIBMM_CHECK_PERL to get the perl path, needed to insert

2005-02-11  Murray Cumming  <murrayc@murrayc.com>

        * docs/: Added manual.
        * configure.in: Use GLIBMM_CHECK_PERL to get the perl path, needed
        to insert the example code in the manual.
        * docs/Makefile.am: Move the reference and manual into a docs folder
        so that the docs and the examples have the same relative path.
        * docs/index.html: Mention the manual and update the links.
parent 3ee110d5
2005-02-11 Murray Cumming <murrayc@murrayc.com>
* docs/: Added manual.
* configure.in: Use GLIBMM_CHECK_PERL to get the perl path, needed
to insert the example code in the manual.
* docs/Makefile.am: Move the reference and manual into a docs folder
so that the docs and the examples have the same relative path.
* docs/index.html: Mention the manual and update the links.
2005-02-11 Murray Cumming <murrayc@murrayc.com>
* libxml++/document.cc: do_write_to_string(): libml returns the
......
......@@ -92,6 +92,8 @@ AC_CHECK_HEADERS(string list map, , exit)
PKG_CHECK_MODULES(LIBXML, libxml-2.0 >= 2.6.1 glibmm-2.4 >= 2.4.0)
GLIBMM_CHECK_PERL([5.6.0])
# Dummy conditional just to make automake-1.4 happy.
# We need an always-false condition in docs/Makefile.am.
AM_CONDITIONAL(LIBXMLCPP_FALSE,[false])
......@@ -109,6 +111,7 @@ AC_OUTPUT(
docs/Makefile
docs/reference/Makefile
docs/reference/Doxyfile
docs/manual/Makefile
examples/Makefile
examples/dom_build/Makefile
......
SUBDIRS = reference
SUBDIRS = reference manual
EXTRA_DIST = Makefile_web.am_fragment
include $(top_srcdir)/docs/Makefile_web.am_fragment
html_docs = index.html reference
html_docs = reference manual
post-html: reference/@GENERIC_MAJOR_VERSION@.@GENERIC_MINOR_VERSION@/html/index.html
rsync $(rsync_args) -r index.html $$USER@shell.sourceforge.net:$(web_path_project)
rsync $(rsync_args) -r $(html_docs) $$USER@shell.sourceforge.net:$(web_path_docs)
rsync $(rsync_args) -r ../examples $$USER@shell.sourceforge.net:$(web_path_project)
web_path_project = /home/groups/l/li/libxmlplusplus/htdocs/
web_path_docs = $(web_path_project)
rsync_args = -vz --rsh ssh --delete --delete-after --recursive --cvs-exclude --exclude="**1.0**"
web_path_docs = $(web_path_project)docs/
rsync_args = -vz --rsh ssh --recursive --cvs-exclude --exclude="**1.0**" --exclude=".cvsignore" --exclude="Makefile" --exclude="Makefile.in" --exclude=".deps" --exclude=".libs"
docdir = $(datadir)/doc/libxml++-2.6/docs
......@@ -51,14 +51,15 @@ the object code.</LI></ul>
</ul>
<h2>Documentation</h2>
<p>This short <a href="docs/manual/html/index.html">manual</a> gives an overview of the libxml++ API.</p>
<p>
You can browse online the Reference documentation generated by
<a href="http://www.stack.nl/~dimitri/doxygen/index.html">
<img src="reference/1.0/html/doxygen.png" alt="doxygen" align="middle" border=0 ></a>
</p>
<ul>
<li><a href="reference/1.0/html/hierarchy.html">1.0 API</a></li>
<li><a href="reference/2.8/html/hierarchy.html">2.8 API</a></li>
<li><a href="docs/reference/1.0/html/hierarchy.html">1.0 API</a></li>
<li><a href="docs/reference/2.8/html/hierarchy.html">2.8 API</a></li>
</ul>
<h2>Development</h2>
......
EXTRA_DIST = README $(docbook_docs) html libxml++.xml libxml++_without_code.xml
# Create a DocBook source file with the code inline, instead of the special comments.
libxml++.xml: libxml++_without_code.xml insert_example_code.pl
$(PERL_PATH) insert_example_code.pl ../../examples $< >$@
docbook_docs = $(srcdir)/libxml++.xml
include $(top_srcdir)/docs/Makefile_web.am_fragment
article_upload_path = $(web_path_docs)manual
DOCBOOK_STYLESHEET ?= http://docbook.sourceforge.net/release/xsl/current/html/chunk.xsl
html/index.html: $(srcdir)/libxml++.xml
-rm -rf html
$(mkinstalldirs) html
xsltproc -o html/ --catalogs $(DOCBOOK_STYLESHEET) $<
libxml++-html.tar.gz: $(srcdir)/html
tar -cf - $< | gzip > $@
%.dvi: $(srcdir)/%.xml
db2dvi $<
%.ps: $(srcdir)/%.xml
db2ps $<
%.pdf: $(srcdir)/%.xml
db2pdf $<
post-lumps: $(srcdir)/libxml++.dvi $(srcdir)/libxml++.ps $(srcdir)/libxml++.pdf $(srcdir)/libxml++-html.tar.gz
tar -cf - $^ | ssh $$SSH_OPT $$USER@libxmlplusplus.sourceforge.net "cd $(libxml++) ; tar -xvf - ; chmod a+r,g+w * ; chgrp libxmlplusplus *"
post-html: $(srcdir)/html/index.html
(cd html && tar -cf - *.html | gzip -3 | \
ssh $$SSH_OPT $$USER@libxmlplusplus.sourceforge.net "cd $(article_upload_path)/html ; gunzip | tar -xvf - ; chmod a+r,g+w * ; chgrp libxmlplusplus *")
manualdir = $(docdir)/manual/html
install-manual: $(srcdir)/html/index.html
@$(NORMAL_INSTALL)
$(mkinstalldirs) $(DESTDIR)$(manualdir)
@dir='$(<D)'; for p in $$dir/*.html ; do \
f="`echo $$p | sed -e 's|^.*/||'`"; \
echo " $(INSTALL_DATA) $$p $(DESTDIR)$(manualdir)/$$f"; \
$(INSTALL_DATA) $$p $(DESTDIR)$(manualdir)/$$f; \
done
uninstall-manual: $(srcdir)/html/index.html
@$(NORMAL_UNINSTALL)
@dir='$(<D)'; for p in $$dir/*.html ; do \
f="`echo $$p | sed -e 's|^.*/||'`"; \
echo " rm -f $(DESTDIR)$(manualdir)/$$f"; \
rm -f $(DESTDIR)$(manualdir)/$$f; \
done
install-data-local: install-manual
uninstall-local: uninstall-manual
all-local: $(srcdir)/html/index.html
clean-local:
-rm -rf $(srcdir)/html
-rm -f $(srcdir)/libxml++.dvi $(srcdir)/libxml++.ps $(srcdir)/libxml++.pdf $(srcdir)/libxml++-html.tar.gz
#! /usr/bin/perl -w
#sub main()
{
my $examples_base = shift(@ARGV);
$examples_base .= "/" unless($examples_base =~ /\/$/);
foreach $file (@ARGV)
{
open(FILE, $file);
while(<FILE>)
{
print $_;
#Beginning of comment:
# Look for
# <para><ulink url="&url_examples_base;helloworld">Source Code</ulink></para>
if(/<para><ulink url=\"&url_examples_base;([\/\w]+)\">Source Code<\/ulink><\/para>/)
{
#List all the source files in that directory:
my $directory = $examples_base . $1;
opendir(DIR, $directory);
my @dir_contents = readdir(DIR);
closedir(DIR);
my @source_files = grep(/\.cc$/, @dir_contents);
my @header_files = grep(/\.h$/, @dir_contents);
print "<!-- start inserted example code -->\n";
foreach $source_file (@header_files, @source_files)
{
print "<para>File: ${source_file}\n";
print "<programlisting>\n";
&process_source_file("${directory}/${source_file}");
print "</programlisting>\n";
print "</para>\n";
}
print "<!-- end inserted example code -->\n";
}
}
close(FILE);
}
exit 0;
}
sub process_source_file($)
{
my ($source_file) = @_;
my $found_start = 0;
open(SOURCE_FILE, $source_file);
while(<SOURCE_FILE>)
{
# Skip all text until the first code line.
if(!$found_start)
{
next unless /^[#\w]/;
$found_start = 1;
}
s/&/&amp;/g;
s/</&lt;/g;
s/>/&gt;/g;
s/"/&quot;/g;
print $_;
}
close(SOURCE_FILE);
}
<?xml version="1.0"?>
<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" [
<!ENTITY date "February 2002">
<!ENTITY path_examples_base "../../examples/">
<!ENTITY url_examples_base "../&path_examples_base;">
]>
<article id="libxmlplusplus" lang="en">
<artheader>
<title>libxml++ - An XML Parser for C++</title>
<author>
<firstname>Murray</firstname>
<surname>Cumming</surname>
<affiliation>
<address><email>murrayc@murrayc.com</email></address>
</affiliation>
</author>
<date>12th September 2004</date>
<abstract>
<para>This is an introduction to libxml's C++ binding, with simple examples.</para>
</abstract>
</artheader>
<sect1 id="section-introduction">
<title>libxml++</title>
<para>
libxml++ is a C++ API for the popular libxml XML parser, written in C. libxml is famous for its high performance and compliance to standard specifications, but its C API is quite difficult even for common tasks.
</para>
<para>
libxml++ presents a simple C++-like API that can achieve common tasks with less code. Unlike some other C++ parsers, it does not try to avoid the advantages of standard C++ features such as namespaces, STL containers or runtime type identification, and it does not try to conform to standard API specifications meant for Java. Therefore libxml++ requires a fairly modern C++ compiler such as g++ 3.
</para>
<para>But libxml++ was created mainly to fill the need for an API-stable and ABI-stable C++ XML parser which could be used as a shared library dependency by C++ applications that are distributed widely in binary form. That means that installed applications will not break when new versions of libxml++ are installed on a user's computer. Gradual improvement of the libxml++ API is still possible via non-breaking API additions, and new independent versions of the ABI that can be installed in parallel with older versions. These are the general techniques and principles followed by the <ulink
url="http://www.gnome.org">GNOME</ulink> project, of which libxml++ is a part.</para>
<sect2>
<title>Installation</title>
<para>libxml++ is packaged by major Linux and *BSD distributions and can be installed from source on Linux and Windows, using any modern compiler, such as g++, SUN Forte, or MSVC++.</para>
<para>For instance, to install libxml++ and its documentation on debian, use apt-get or synaptic like so:
<programlisting>
# apt-get install libxml++2.6-dev libxml++2.6-doc
</programlisting>
</para>
<para>The source code may be downloaded from <ulink
url="http://libxmlplusplus.sourceforge.net">libxmlplusplus.sourceforge.net</ulink>
. libxml++ is licensed under the LGPL, which allows its use via dynamic linking in both open source and closed-source software. The underlying libxml library uses the even more generous MIT licence.</para>
</sect2>
<sect2>
<title>UTF-8 and Glib::ustring</title>
<para>The libxml++ API takes, and gives, strings in the UTF-8 Unicode encoding, which can support all known languages and locales. This choice was made because, of the encodings that have this capability, UTF-8 is the most commonly accepted choice. UTF-8 is a multi-byte encoding, meaning that some characters use more than 1 byte. But for compatibility, old-fashioned 7-bit ASCII strings are unchanged when encoded as UTF-8, and UTF-8 strings do not contain null bytes which would cause old code to misjudge the number of bytes. For these reasons, you can store a UTF-8 string in a std::string object. However, the std::string API will operate on that string in terms of bytes, instead of characters.</para>
<para>Because Standard C++ has no string class that can fully handle UTF-8, libxml++ uses the Glib::ustring class from the glibmm library. Glib::ustring has almost exactly the same API as std::string, but methods such as length() and operator[] deal with whole UTF-8 characters rather than raw bytes.</para>
<para>There are implicit conversions between std::string and Glib::ustring, so you can use std::string wherever you see a Glib::ustring in the API, if you really don't care about any locale other than English. However, that is unlikely in today's connected world.</para>
<para>glibmm also provides useful API to convert between encodings and locales.</para>
</sect2>
</sect1>
<sect1 id="parsers">
<title>Parsers</title>
<para>Like the underlying libxml library, libxml++ allows the use of 3 parsers, depending on your needs - the DOM, SAX, and TextReader parsers. The relative advantages and behaviour of these parsers will be explained here.</para>
<para>All of the parsers may parse XML documents directly from disk, a string, or a C++ std::istream. Although the libxml++ API uses only Glib::ustring, and therefore the UTF-8 encoding, libxml++ can parse documents in any encoding, converting to UTF-8 automatically. This conversion will not lose any information because UTF-8 can represent any locale.</para>
<para>Remember that white space is usually significant in XML documents, so the parsers might provide unexpected text nodes that contain only spaces and new lines. The parser does not know whether you care about these text nodes, but your application may choose to ignore them.</para>
<sect2>
<title>DOM Parser</title>
<para>The DOM parser parses the whole document at once and stores the structure in memory, available via <literal>Parser::get_document()</literal>. With methods such as <literal>Document::get_root_node()</literal> and <literal>Node::get_children()</literal>, you may then navigate into the heirarchy of XML nodes without restriction, jumping forwards or backwards in the document based on the information that you encounter. Therefore the DOM parser uses a relatively large amount of memory.</para>
<para>You should use C++ RTTI (via <literal>dynamic_cast&lt;&gt;</literal>) to identify the specific node type and to perform actions which are not possible with all node types. For instance, only <literal>Element</literal>s have attributes. Here is the inheritance hierarchy of node types:</para>
<para>
<itemizedlist>
<listitem>xmlpp::Node:
<itemizedlist>
<listitem>xmlpp::Attribute</listitem>
<listitem>xmlpp::ContentNode
<itemizedlist>
<listitem>xmlpp::CdataNode</listitem>
<listitem>xmlpp::CommentNode</listitem>
<listitem>xmlpp::ProcessingInstructionNode</listitem>
<listitem>xmlpp::TextNode</listitem>
</itemizedlist>
</listitem>
<listitem>xmlpp::Element</listitem>
<listitem>xmlpp::EntityReference</listitem>
</itemizedlist>
</listitem>
</itemizedlist>
</para>
<para>Although you may obtain pointers to the <literal>Node</literal>s, these <literal>Node</literal>s are always owned by their parent Nodes. In most cases that means that the Node will exist, and your pointer will be valid, as long as the <literal>Document</literal> instance exists.</para>
<para>There are also several methods which can create new child <literal>Node</literal>s. By using these, and one of the <literal>Document::write_*()</literal> methods, you can use libxml++ to build a new XML document.</para>
<sect3>
<title>Example</title>
<para>This example looks in the document for expected elements and then examines them.</para>
<para><ulink url="&url_examples_base;dom_parser">Source Code</ulink></para>
</sect3>
</sect2>
<sect2>
<title>SAX Parser</title>
<para>The SAX parser presents each node of the XML document in sequence. So when you process one node, you must have already stored information about any relevant previous nodes, and you have no information at that time about subsequent nodes. The SAX parser uses less memory than the DOM parser and it is a suitable abstraction for documents that can be processed sequentially rather than as a whole.</para>
<para>By using the <literal>parse_chunk()</literal> method instead of <literal>parse()</literal>, you can even parse parts of the XML document before you have received the whole document.</para>
<para>As shown in the example, you should derive your own class from SaxParser and override some of the virtual methods. These &quot;handler&quot; methods will be called while the document is parsed.</para>
<sect3>
<title>Example</title>
<para>This example shows how the handler methods are called during parsing.</para>
<para><ulink url="&url_examples_base;sax_parser">Source Code</ulink></para>
</sect3>
</sect2>
<sect2>
<title>TextReader Parser</title>
<para>Like the SAX parser, the TextReader parser is suitable for sequential parsing, but instead of implementing handlers for specific parts of the document, it allows you to detect the current node type, process the node accordingly, and skip forward in the document as much as necessary. Unlike the DOM parser, you may not move backwards in the XML document. And unlike the SAX parser, you must not waste time processing nodes that do not interest you. </para>
<para>All methods are on the single parser instance, but their result depends on the current context. For instance, use <literal>read()</literal> to move to the next node, and <literal>move_to_element()</literal> to navigate to child nodes. These methods will return false when no more nodes are available. Then use methods such as <literal>get_name()</literal> and <literal>get_value()</literal> to examine the elements and their attributes.</para>
<sect3>
<title>Example</title>
<para>This example examines each node in turn, then moves to the next node.</para>
<para><ulink url="&url_examples_base;textreader">Source Code</ulink></para>
</sect3>
</sect2>
</sect1>
</article>
......@@ -33,8 +33,6 @@ doc-clean:
-rm -f $libxml++-reference-html.tar.gz
docdir = $(datadir)/doc/libxml++-2.6/docs
referencedir = $(docdir)/reference/@GENERIC_MAJOR_VERSION@.@GENERIC_MINOR_VERSION@/html
install-reference: $(index_html)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment