Commit 6a61ec1a authored by Eugene V. Lyubimkin's avatar Eugene V. Lyubimkin

Imported Upstream version 1.3.2a

parent 12cc3e30
This diff is collapsed.
This diff is collapsed.
/***************************************************************************/
/*
* Portions Copyright (c) 1999 GMRS Software GmbH
* Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de
* All rights reserved.
*
* Author: Arno Unkrig <arno@unkrig.de>
*/
/* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License in the file COPYING for more details.
*/
/***************************************************************************/
/*
* Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de>
* Dates and reasons of modifications:
* Thu Oct 4 21:21:10 CEST 2001: ported to g++ 3.0
* Wed Jul 2 21:59:41 CEST 2003: ported to g++ 3.3
*/
/***************************************************************************/
#ifndef __Area_h_INCLUDED__ /* { */
#define __Area_h_INCLUDED__
/* ------------------------------------------------------------------------- */
#include <sys/types.h>
#include <string>
#include <istream>
#ifdef BOOL_DEFINITION
BOOL_DEFINITION
#undef BOOL_DEFINITION
#endif
using std::string;
using std::ostream;
/* ------------------------------------------------------------------------- */
struct Cell {
char character;
char attribute;
enum { NONE = 0, UNDERLINE = 1, BOLD = 2, STRIKETHROUGH = 4 };
void clear() { character = ' '; attribute = NONE; }
};
/* ------------------------------------------------------------------------- */
class Line {
public:
typedef size_t size_type;
Line(size_type l = 0);
Line(const char *);
Line(const string &);
~Line();
size_type length() const { return length_; }
bool empty() const { return length_ == 0; }
const Cell &operator[](size_type x) const { return cells_[x]; }
Cell &operator[](size_type x) { return cells_[x]; }
const Cell *cells() const { return cells_; }
void resize(size_type l);
void enlarge(size_type l) { if (l > length_) resize(l); }
void insert(const Line &, size_type x);
void insert(const char *, size_type x);
void insert(const string &, size_type x);
void append(char c );
void append(const Line &l);
void append(const char *p);
const Line &operator+=(char c ) { append(c); return *this; }
const Line &operator+=(const Line &l) { append(l); return *this; }
const Line &operator+=(const char *p) { append(p); return *this; }
void add_attribute(char addition);
private:
Line(const Line &);
const Line &operator=(const Line &);
size_type length_;
Cell *cells_;
friend class Area;
};
/* ------------------------------------------------------------------------- */
class Area {
public:
typedef size_t size_type;
enum {
LEFT, CENTER, RIGHT,
TOP, MIDDLE, BOTTOM
};
Area();
Area(size_type w, size_type h = 0, char = ' ', char = Cell::NONE);
Area(const char *);
Area(const string &);
Area(const Line &);
~Area();
size_type width() const { return width_; }
size_type height() const { return height_; }
const Cell *operator[](size_type y) const { return cells_[y]; }
Cell *operator[](size_type y) { return cells_[y]; }
const Area &operator>>=(size_type rs);
void resize(size_type w, size_type h);
void enlarge(size_type w, size_type h);
void insert(const Line &l, size_type x, size_type y)
{ insert(l.cells_, l.length_, x, y); }
void insert(const Area &, size_type x, size_type y);
void insert(
const Area &,
size_type x,
size_type y,
size_type w,
size_type h,
int halign,
int valign
);
void insert(const Cell &, size_type x, size_type y);
void insert(const Cell *, size_type count, size_type x, size_type y);
void insert(char, size_type x, size_type y);
void insert(const string &, size_type x, size_type y);
void prepend(int n); // Prepend blank lines at top
void append(int n) // Append blank lines at bottom
{ enlarge(width(), height() + n); }
const Area &operator+=(const Area &); // Append at bottom!
const Area &operator+=(int n) { append(n); return *this; }
void fill(const Cell &, size_type x, size_type y, size_type w, size_type h);
void fill(char, size_type x, size_type y, size_type w, size_type h);
void add_attribute(char addition); // ...but not to left and right free areas
void add_attribute(
char addition,
size_type x,
size_type y,
size_type w,
size_type h
);
static bool use_backspaces; // "true" by default.
private:
Area(const Area &);
const Area &operator=(const Area &);
size_type width_;
size_type height_;
Cell **cells_;
friend ostream &operator<<(ostream &, const Area &);
};
/* ------------------------------------------------------------------------- */
#endif /* } */
/* ------------------------------------------------------------------------- */
This diff is collapsed.
This diff is collapsed.
## CREDITS - Thanks to... Sun Nov 23 12:12:18 CET 2003
## ===========================================================================
#
# Since september 2000, these people have contributed to the development of
# this program:
Johannes Geiger <geiger@informatik.tu-muenchen.de>
+ SCRIPT/STYLE error patch
+ new IMG handling
Randolph Chung <tausq@debian.org>
+ ported to g++-3.0
Arno Unkrig <arno@unkrig.de>
+ almost all bugfixes in 1.3.1. Thanks!
Bela Lubkin <belal@caldera.com>
+ Plain-ASCII output patch
Kirby Zhou <kirbyzhou@263.net>
+ patch for SCRIPT/STYLE elements within table cells
Nicolas Boullis <nboullis@debian.org>
+ ported to g++-3.3 (this change is not backward-compatible)
Alexander Solovey <alsol@sumdu.edu.ua>
+ bugfix for urlistream.h
+ better rendering of XHTML
## ---------------------------------------------------------------------------
Martin Bayer <mbayer@zedat.fu-berlin.de>
This diff is collapsed.
/***************************************************************************/
/*
* Portions Copyright (c) 1999 GMRS Software GmbH
* Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de
* All rights reserved.
*
* Author: Arno Unkrig <arno@unkrig.de>
*/
/* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License in the file COPYING for more details.
*/
/***************************************************************************/
/*
* Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de>
* Dates and reasons of modifications:
* Thu Oct 4 21:25:07 CEST 2001: ported to g++ 3.0
* Wed Jul 2 22:01:12 CEST 2003: ported to g++ 3.3
*/
/***************************************************************************/
#ifndef __HTMLControl_h_INCLUDED__ /* { */
#define __HTMLControl_h_INCLUDED__
/* ------------------------------------------------------------------------- */
#include "HTMLParser.h"
#include "urlistream.h"
#include <istream>
using std::istream;
/* ------------------------------------------------------------------------- */
class HTMLControl : public HTMLParser {
public:
HTMLControl(urlistream &is_, bool debug_scanner_, bool debug_parser_) :
HTMLParser(),
current_line(1),
current_column(0),
literal_mode(false),
next_token(EOF),
debug_scanner(debug_scanner_),
is(is_),
number_of_ungotten_chars(0)
{ yydebug = debug_parser_; }
int current_line;
int current_column;
private:
/*
* Implementing virtual methods of "HTMLParser".
*/
/*virtual*/ int yylex(yy_HTMLParser_stype *value_return);
/*virtual*/ bool read_cdata(const char *terminal, string *value_return);
/*
* Helpers.
*/
int yylex2(yy_HTMLParser_stype *value_return, int *tag_type_return);
bool literal_mode;
int next_token;
yy_HTMLParser_stype next_token_value;
int next_token_tag_type;
int get_char();
void unget_char(int);
bool debug_scanner;
urlistream &is;
int ungotten_chars[5];
int number_of_ungotten_chars;
};
/* ------------------------------------------------------------------------- */
#endif /* } */
/* ------------------------------------------------------------------------- */
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
## INSTALL - How to compile and install Sat Aug 31 12:04:42 CEST 2002
## ===========================================================================
#
# For the impatient:
'cd' into the html2text distribution directory
./configure (in some shells 'sh ./configure')
make (or in some cases 'gmake')
'mv' the html2text executable to its installation directory (e.g. /usr/local/bin)
## ---------------------------------------------------------------------------
#
# html2text can be compiled on many UNIX-like platforms, in particular those
# that provide a GNU compiler (g++).
1. Untar the distribution package:
$ gunzip html2text-1.3.X.tar.gz
$ tar xf html2text-1.3.X.tar
2. Change into the html2text distribution directory:
$ cd html2text-1.3.X
3. To generate the make files, issue:
$ ./configure
If you're using a csh on an old version of System V, you might need to type
'sh ./configure' to prevent the csh from trying to execute 'configure'
itself.
While the script is running, it prints some messages telling which features
it is checking for, alike (the exact output depends on your platform):
| Checking C++ compiler... use "g++"
| Checking <sys/poll.h>... OK
| Checking for socket libraries... no extra libraries required
| Checking "bool"... built-in
| Checking "explicit"... built-in
| Checking Standard C++ library... works; no need to make "./libstd"
| Checking "auto_ptr"... not defined or not working, use
| "./libstd/include/auto_ptr.h"
| Checking "makedepend" includes... use "-I/usr/include -I/usr/include/bits
| -I/usr/include/g++ -I/usr/include/gnu -I/usr/include/sys
| -I/usr/lib/gcc-lib/i486-suse-linux/2.95.3/include "
| Creating "./Makefile" from "./Makefile.in"... done
|
| Preparing completed. You may now run "make" (or "gmake").
4. Then compile html2text with:
$ make
On non-GNU systems and/or if 'make' fails, you might need to use
'gmake' instead of 'make', in order to force a compilation by g++.
Compiling 'html2text' takes awhile. While 'make' is running, it will print
some messages (and, hopefully, no errors), alike:
| g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g html2text.C
| g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g html.C
| g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g HTMLControl.C
| g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g HTMLParser.C
| g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g Area.C
| g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g format.C
| g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g sgml.C
| g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g table.C
| g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g urlistream.C
| g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g Properties.C
| g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g cmp_nocase.C
| g++ -O2 -g html2text.o html.o HTMLControl.o HTMLParser.o Area.o format.o
| sgml.o table.o urlistream.o Properties.o cmp_nocase.o -o html2text
|
| Compilation completed. You may now move "html2text", "html2text.1.gz"
| and "html2textrc.5.gz" to their installation directories (e.g.
| "/usr/local/bin", "/usr/local/man/man1" and "/usr/local/man/man5").
5. No automatic installation is provided by now.
Be root. Then move the html2text executable and the manual pages to their
respective installation directories (e.g. /usr/local/bin, /usr/local/man/man1
and /usr/local/man/man5). Be sure you set the execution rights accordingly
and the files are owned by root or whoever owns binaries on your system. E.g.:
# install -s -m 755 html2text /usr/local/bin
# install -m 644 html2text.1.gz /usr/local/man/man1
# install -m 644 html2textrc.5.gz /usr/local/man/man5
After this, create a directory for the documentation files at an appropriate
place (e.g. /usr/doc/html2text or /usr/share/doc/html2text), and move all
documentation files into that folder. E.g.:
# install -d -m 755 /usr/share/doc/html2text
# install -b -p -m 644 *[ABD-Z] /usr/share/doc/html2text
Be sure you read the README file!
## ---------------------------------------------------------------------------
#
# If you prefer to install the program as an RPM package:
Place the orginal source tarball to rpm's build directory, e.g.
/usr/src/packages/SOURCES
Then, download the spec file from the program's homepage and start the build
with the command
rpm -bb html2text.spec
After successfull compilation, rpm will print a message like
| Wrote: /usr/src/packages/RPMS/i386/html2text-1.3.1-1.i386.rpm
This package can now be installed with rpm, e.g.
rpm -Uhv /usr/src/packages/RPMS/i386/html2text-1.3.1-1.i386.rpm
## ===========================================================================
#
# Hints for porting html2text to other platforms and for solving other problems:
On g++ version 3, the 'istream.h' header file from the compiler's 'backward'
directory is used. You might need to point to that directory in the Makefile's
line beginning with 'LIBSTDCXX_INCLUDES', e.g.
LIBSTDCXX_INCLUDES = -I/usr/local/include/g++-v3/backward
Some compilers have weird built-in rules which collide with the rules in the
make files. If 'make' fails, then try to disable the built-in rules. For most
'make' utilities, this can be achieved by specifying the '-r' command line
option, i.e. 'make -r'.
From version 1.02, html2text requires the "socket()" function and its
friends, which live in different libraries for the different systems.
"configure" tries several sets of "-l" linker options to find the right
libraries.
Some C++ compilers lack a built-in "bool" data type. "configure" attempts to
find a suitable definition which it passes to the compiler with
"-DBOOL_DEFINITION=...".
html2text requires a standard C++ library. Since some old C++ compilers do
not come with a standard C++ library, I have implemented parts of the library
in the 'libstd' subdirectory; the exported header files are in
'libstd/include'. For platforms which come with a (working) standard C++
library, the "home-grown" library in 'libstd' is not compiled and linked.
However, on some platforms, these librarys did not work. I don't know if this
because the compiler or our 'libstd' library is broken, or because we have
some syntax errors in the source code files.
Some old GNU Standard C++ Library versions falsely re-names "list::erase()"
as "list::remove()". I fixed this with a "-DSTRING_ERASE=remove" on the "g++"
command line.
Some compilers do come with a Standard C++ Library, but the definition of the
"auto_ptr" template is missing from "<memory>", or is not standard-compliant.
This condition is checked by "configure" and "./libstd/include/auto_ptr.h" is
included if necessary (preprocessor symbol "HAS_WORKING_AUTO_PTR").
"make depend" attempts to run the MAKEDEPEND utility (not included in this
package). Unfortunately, most C++ compilers implicitly use some "secret"
include directories, which MAKEDEPEND doesn't know of; as a result,
MAKEDEPEND may issue some "could not find..." errors, which shouldn't worry
you. The "configure" script attempts to guess the "secret include directory"
and passes it with "-I" to MAKEDEPEND.
To generate 'HTMLParser.C' and 'HTMLParser.h' from 'HTMLParser.y' and
'HTMLParker.k' (which can be archived with the 'make bison-local' directive),
you need bison++ version 2.2, written by Alain Coetmeur. Unfortunately, this
version is completely outdated and thus not longer being maintained or
supported in any way. As you will need it if you want to hack into the program,
you can find the program's sources on html2text's homepage.
However, the already generated 'HTMLParser.C' and 'HTMLParser.h' files are
included into html2text's source code package, enableing anybody to compile
html2text without having to install bison++-2.2 before.
## ===========================================================================
Martin Bayer <mbayer@zedat.fu-berlin.de>
## KNOWN_BUGS - Problems with html2text Sat Jul 5 12:40:24 CEST 2003
## ===========================================================================
# ( 0 )=======================================================================
# Feature requests
# ( 0.1 )---------------------------------------------------------------------
# Make HTTP implementation optional?
A corresponding compile-time option would enhance portability (no socket()
needed). Moreover, html2text's HTTP implementation is rather rudimentary, so
it maybe could be eliminated completely. But I don't know how many users
prefer to have html2text to fetch the files by itself instead of using it
within a pipe...
This however would lead to a complete re-write of the configure-script,
because we don't have any compile-time options yet.
# ( 1 )=======================================================================
# Bugs of minor severity
#
# The package fails to perform correctly on some conditions, or in some
# systems, or fails to comply current policy documents.
# ( 1.1 )---------------------------------------------------------------------
# When parsing much nested TABLEs, html2text will run out of control.
This problem occurs on very complex tables with more than about 25 nested
TABLE elements, because the runtime increases exponentially with each nested
table.
Sorry, no fix for this within sight.
# ============================================================================
Martin Bayer <mbayer@zedat.fu-berlin.de>
# -----------------------------------------------------------------------------
#
# Portions Copyright (c) 1999 GMRS Software GmbH
# Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de
# All rights reserved.
#
# Author: Arno Unkrig <arno@unkrig.de>
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License in the file
# COPYING for more details.
#
# -----------------------------------------------------------------------------
#
# Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de>
#
# -----------------------------------------------------------------------------
VERSION=1.3.2a
BISONXX = bison++
YFLAGS =
INSTALLER = install
BINDIR = /usr/local/bin
MANDIR = /usr/local/man
DOCDIR = /usr/share/doc/html2text
CXX = @CXX@
BOOL_DEFINITION = @BOOL_DEFINITION@
EXPLICIT = @EXPLICIT@
SYS_POLL_MISSING = @SYS_POLL_MISSING@
SOCKET_LIBRARIES = @SOCKET_LIBRARIES@
LIBSTDCXX_INCLUDES = @LIBSTDCXX_INCLUDES@
LIBSTDCXX_LIBS = @LIBSTDCXX_LIBS@
AUTO_PTR_BROKEN = @AUTO_PTR_BROKEN@
MAKEDEPEND_INCLUDES = @MAKEDEPEND_INCLUDES@
DEBUG=-O2 -g
INCLUDES = $(LIBSTDCXX_INCLUDES)
DEFINES = -DVERSION=$(VERSION) $(SYS_POLL_MISSING) $(BOOL_DEFINITION) $(EXPLICIT) $(AUTO_PTR_BROKEN)
CPPFLAGS = $(INCLUDES) $(DEFINES)
CXXFLAGS = $(CPPFLAGS) $(DEBUG)
LDFLAGS = $(DEBUG)
LOADLIBES = $(LIBSTDCXX_LIBS) $(SOCKET_LIBRARIES)
.SUFFIXES : .C .o
.C.o :
$(CXX) -c $(CXXFLAGS) $*.C
# -----------------------------------------------------------------------------
default : all
all : html2text
@echo ;
@echo 'Compilation completed. You may now move "html2text", "html2text.1.gz"';
@echo 'and "html2textrc.5.gz" to their installation directories (e.g.';
@echo '"/usr/local/bin", "/usr/local/man/man1" and "/usr/local/man/man5").';
@echo
OBJS = html2text.o html.o HTMLControl.o HTMLParser.o Area.o format.o sgml.o table.o urlistream.o Properties.o cmp_nocase.o
html2text : $(OBJS) $(LIBSTDCXX_LIBS)
$(CXX) $(LDFLAGS) $(OBJS) $(LOADLIBES) $(LDLIBS) -o $@
libstd/libstd.a :
cd libstd && $(MAKE)
# -----------------------------------------------------------------------------
# Since it is very unlikely that bison++-2.2 is installed (available on
# html2text's homepage), HTMLParser.h and HTMLParser.C are only built when
# 'make bison-local' is issued.
bison-local :
cmp -s HTMLParser.h HTMLParser.k || cp HTMLParser.k HTMLParser.h;
$(BISONXX) $(YFLAGS) -o HTMLParser.C -d -h HTMLParser.k HTMLParser.y
# -----------------------------------------------------------------------------
# This is mostly thought for RPM builts and users that don't read the documentation.
install :
$(INSTALLER) -s -m 755 html2text $(BINDIR);
$(INSTALLER) -m 644 html2text.1.gz $(MANDIR)/man1;
$(INSTALLER) -m 644 html2textrc.5.gz $(MANDIR)/man5;
$(INSTALLER) -d -m 755 $(DOCDIR);
$(INSTALLER) -p -m 644 CHANGES COPYING CREDITS KNOWN_BUGS README RELEASE_NOTES TODO $(DOCDIR)
# -----------------------------------------------------------------------------
SUBDIRS = libstd
# "./configure" creates "Makefile"s only in the subdirectories that need to
# be built, so we check for the existance of these "Makefile".
clean clobber depend :
@for i in $(SUBDIRS); do \
if test -r $$i/Makefile; then \
( \
cd $$i && echo "*** make $@ in `pwd`" && $(MAKE) $@ || \
{ echo "*** make $@ error in `pwd`" && false; } \
) || exit 1; \
echo "*** Back in `pwd`"; \
fi; \
done;
clean : local-clean
local-clean :
rm -f *.o *~ core html2text;
clobber : local-clobber
local-clobber : local-clean
rm -f html2text
depend : local-depend
local-depend : HTMLParser.h
@>Dependencies
makedepend -f Dependencies $(CPPFLAGS) $(MAKEDEPEND_INCLUDES) *.C
@rm -f Dependencies.bak
# -----------------------------------------------------------------------------
include Dependencies
/***************************************************************************/
/*
* Portions Copyright (c) 1999 GMRS Software GmbH
* Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de
* All rights reserved.
*
* Author: Arno Unkrig <arno@unkrig.de>
*/
/* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License in the file COPYING for more details.
*/
/***************************************************************************/
/*
* Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de>
* Dates and reasons of modifications:
* Fre Jun 8 17:24:35 CEST 2001: new method
* Wed Jul 2 22:02:51 CEST 2003: ported to g++ 3.3
*/
/***************************************************************************/
#include <ctype.h>
#include <iostream>
#include "Properties.h"
/* ------------------------------------------------------------------------- */
const char *
Properties::getProperty(const char *key, const char *dflt) const
{
map<string, string>::const_iterator i;
i = property_map.find(key);
return i == property_map.end() ? dflt : (*i).second.c_str();
}
// neue Methode fuer leere Attribute - Johannes Geiger
const char *
Properties::getProperty(const char *key) const
{
map<string, string>::const_iterator i;
i = property_map.find(key);
return i == property_map.end() ? NULL : (*i).second.c_str();
}
/* ------------------------------------------------------------------------- */