Skip to content
Snippets Groups Projects
rss2html.py 2.27 KiB
Newer Older
  • Learn to ignore specific revisions
  • """
    A demo that reads in an RSS XML document and emits an HTML file containing
    a list of the individual items in the feed.
    """
    
    import sys
    import codecs
    
    from xml.sax import make_parser, handler
    
    # --- Templates
    
    top = """\
    <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
    <html>
    <head>
      <title>%s</title>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
    </head>
    
    <body>
    <h1>%s</h1>
    """
    
    bottom = """
    </ul>
    
    <hr>
    <address>
    Converted to HTML by rss2html.py.
    </address>
    
    </body>
    </html>
    """
    
    # --- The ContentHandler
    
    class RSSHandler(handler.ContentHandler):
    
        def __init__(self, out=sys.stdout):
            handler.ContentHandler.__init__(self)
            self._out = codecs.getwriter('utf-8')(out)
    
            self._text = ""
            self._parent = None
            self._list_started = False
            self._title = None
            self._link = None
            self._descr = ""
    
        # ContentHandler methods
    
        def startElement(self, name, attrs):
            if name == "channel" or name == "image" or name == "item":
                self._parent = name
    
            self._text = ""
    
        def endElement(self, name):
            if self._parent == "channel":
                if name == "title":
                    self._out.write(top % (self._text, self._text))
                elif name == "description":
                    self._out.write("<p>%s</p>\n" % self._text)
    
            elif self._parent == "item":
                if name == "title":
                    self._title = self._text
                elif name == "link":
                    self._link = self._text
                elif name == "description":
                    self._descr = self._text
                elif name == "item":
                    if not self._list_started:
                        self._out.write("<ul>\n")
                        self._list_started = True
    
                    self._out.write('  <li><a href="%s">%s</a> %s\n' %
                                    (self._link, self._title, self._descr))
    
                    self._title = None
                    self._link = None
                    self._descr = ""
    
            if name == "rss":
                self._out.write(bottom)
    
        def characters(self, content):
            self._text = self._text + content
    
    # --- Main program
    
    if __name__ == '__main__':
        parser = make_parser()
        parser.setContentHandler(RSSHandler())
        parser.parse(sys.argv[1])