fetch-manual-pages 1.85 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
#!/usr/bin/python3
#
# This file is part of FreedomBox.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#

import re
import urllib.request

MANUAL_PAGE_URL = "https://wiki.debian.org/FreedomBox/Manual/{}?action=show&mimetype=text%2Fdocbook"

MANUAL_INDEX_RAW_URL = "https://wiki.debian.org/FreedomBox/Manual?action=raw"

manual_pages = []

to_remove = ['QuickStart', 'GettingHelp', 'Developer']


31 32 33
def list_manual_pages():
    """Fetch the list of manual pages and write them to a file.
    """
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
    pattern = 'FreedomBox/Manual/\w+'
    lst = list(urllib.request.urlopen(MANUAL_INDEX_RAW_URL))
    global manual_pages
    manual_pages = list(l[0].split('/')[-1] for l in filter(
        None, map(lambda x: re.findall(pattern, x.decode()), lst)))
    for entry in to_remove:
        if entry in manual_pages:
            manual_pages.remove(entry)
    with open('manual-pages.list', 'w') as lst_file:
        lst_file.write('\n'.join(manual_pages))


def fetch_manual_pages_in_docbook_format():
    for page in manual_pages:
        url = MANUAL_PAGE_URL.format(page)
49 50
        filename = '{}.raw.xml'.format(page)
        urllib.request.urlretrieve(url, filename)
51 52 53


def main():
54
    list_manual_pages()
55 56 57 58 59
    fetch_manual_pages_in_docbook_format()


if __name__ == '__main__':
    main()