sreview.py 5.18 KB
Newer Older
1 2 3 4
#!/usr/bin/env python3

import argparse
import os
5
import re
6
import xml.etree.ElementTree as ET
7 8
from datetime import date, datetime
from pathlib import Path
9
from urllib.parse import urljoin
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

import requests

from utils.yaml import load_meta_from_dict, yaml_dump


def parse_date(datestr):
    return date(*[int(part) for part in datestr.split('-')])


def parse_datetime(datestr):
    return datetime.strptime(datestr, '%Y-%m-%d %H:%M:%S+00')


def split_speakers(speakers):
    speakers = ','.join(speakers)
    for speaker_or_two in speakers.split(','):
        for speaker in speaker_or_two.split(' and '):
            yield speaker.strip()


31 32 33 34 35 36 37 38 39 40
def read_dc_schedule(url):
    r = requests.get(url)
    schedule = ET.fromstring(r.content)
    by_url = {}
    for event in schedule.findall('.//event'):
        by_url[event.findtext('conf_url')] = event.findtext('description')

    return by_url


41 42 43 44 45 46 47 48
def debconf(meta, wafer_base_url):
    if not wafer_base_url:
        return
    pentabarf_url = urljoin(wafer_base_url, 'schedule/pentabarf.xml')
    schedule = read_dc_schedule(pentabarf_url)
    m = re.match(r'^https://debconf(\d+)\.debconf\.org/$', wafer_base_url)
    if m:
        edition = int(m.group(1))
49
        meta.conference.series = 'DebConf'
50 51 52 53
        meta.conference.edition = str(edition)
        meta.conference.location = 'UNKNOWN'
        meta.conference.website = wafer_base_url
        meta.conference.schedule = urljoin(wafer_base_url, '/schedule/')
54
        meta.conference.video_base = (
55 56
            'https://meetings-archive.debian.net/pub/debian-meetings/'
            '{}/DebConf{}/'.format(2000 + edition, edition))
57 58

        for video in meta.videos:
Stefano Rivera's avatar
Stefano Rivera committed
59 60
            conf_url = video.description  # Hack, we stashed this there
            if conf_url in schedule:
61
                video.description = schedule[video.description]
62
                video.details_url = urljoin(wafer_base_url, conf_url)
63 64
            else:
                del video.description
65 66


67
def fixup(json, wafer_base_url):
68 69 70 71 72
    # We parsed as JSON, dates weren't parsed
    json['conference']['date'] = [
        parse_date(datestr) for datestr in json['conference']['date']]

    # https://github.com/yoe/sreview/issues/59
73 74 75 76 77 78 79
    video_formats = {}
    for item in json['conference']['video_formats']:
        name = list(item.keys())[0]
        format_ = item[name]

        if isinstance(format_['bitrate'], int):
            format_['bitrate'] = str(format_['bitrate']) + 'k'
80 81 82
        elif format_['bitrate'] is None:
            if name == 'DebConfLQ':
                format_['bitrate'] = '256k'
83

84
        if name == 'DebConfLQ':
85 86 87
            name = 'lq'

        video_formats[name] = format_
88 89

    json['conference']['video_formats'] = video_formats
90 91

    for video in json['videos']:
92
        if wafer_base_url:
93 94 95 96 97 98
            # temporarily stash ID in description, so we can find it later
            video['description'] = video.pop('eventid')
        else:
            del video['eventid']
        if video['description'] is None:
            del video['description']
99 100 101 102
        else:
            description = video['description'].strip()
            video['description'] = re.sub(r'\s*\n\s*', '\n', description)

103
        video['speakers'] = list(split_speakers(video['speakers']))
104 105
        if video['speakers'] == ['']:
            del video['speakers']
106 107 108
        video['start'] = parse_datetime(video['start'])
        video['end'] = parse_datetime(video['end'])

Stefano Rivera's avatar
Stefano Rivera committed
109 110 111 112 113 114 115 116
        video_url = video['video']
        # Strip prefix dir, we can include it in video_base
        video_url = video_url.split('/', 1)[1]
        if video_url.endswith('.lq.webm'):
            video['alt_formats'] = {
                'lq': video_url,
            }
            video_url = video_url.replace('.lq.webm', '.webm')
117 118 119 120
        else:
            video['alt_formats'] = {
                'lq': video_url.rsplit('.', 1)[0] + '.lq.webm',
            }
Stefano Rivera's avatar
Stefano Rivera committed
121 122
        video['video'] = video_url

123 124
    json['videos'].sort(key=lambda v: (v['start'], v['room']))

125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
    return json


def re_serialize(meta, path):
    if all(hasattr(event, 'start') for event in meta.videos):
        meta.videos.sort(key=lambda event: event.start)

    tmp_path = path.with_name('.new')
    with tmp_path.open('w', encoding='utf8') as f:
        try:
            yaml_dump(meta, f)
            f.flush()
            os.fsync(f.fileno())
            os.replace(str(tmp_path), str(path))
        finally:
            if tmp_path.exists():
                tmp_path.unlink()


def main():
    parser = argparse.ArgumentParser(
        description='Download sreview JSON, save as YAML')
    parser.add_argument('output', help='File to produce')
    parser.add_argument(
        '-u', '--url', default='https://sreview.debian.net/released',
        help='SReview /released URL')
151 152
    parser.add_argument(
        '-w', '--wafer', help='Wafer conference site URL')
153 154
    args = parser.parse_args()

155 156 157
    if args.wafer:
        args.wafer = args.wafer.rstrip('/') + '/'

158 159 160 161
    r = requests.get(args.url)
    if r.status_code != 200:
        raise Exception('HTTP {}'.format(r.status_code))
    json = r.json()
162

163
    json = fixup(json, args.wafer)
164 165
    meta = load_meta_from_dict(json)

166
    debconf(meta, args.wafer)
167 168 169 170 171 172

    re_serialize(meta, Path(args.output))


if __name__ == '__main__':
    main()