Commit a4bf4abb authored by Jochen Sprickerhof's avatar Jochen Sprickerhof Committed by Holger Levsen
Browse files

reproduce.debian.net: add rebuilder_stats.py

parent 96050e1d
Loading
Loading
Loading
Loading

bin/rebuilder_stats.py

0 → 100644
+119 −0
Original line number Diff line number Diff line
#!/usr/bin/python3

from collections import defaultdict
from datetime import datetime
from re import search
from sqlite3 import connect
from sys import argv


def main() -> None:
    arch = argv[1]
    cx = connect("/var/lib/jenkins/rebuilderd.db")
    cx.create_function("regexp", 2, lambda x, y: 1 if search(x, y) else 0)
    cu = cx.cursor()

    log = "CAST(b.build_log AS TEXT)"
    r_packages = "p.name like 'r-cran-%' or p.name like 'r-bioc-%' or p.name like 'r-other-%'"

    error_messages = {
        f"{log} like '%rebuilderd: unexpected error while rebuilding package:"
        " Failed to download build input from%'": "buildinfo file 404 (maybe temporary)",
        f"{log} like '%rebuilderd: unexpected error while rebuilding package:"
        " Failed to download original package from%'": "package file 404 (temporary)",
        f"{log} like '%debsnap: fatal error at line 27%'": "debsnap failed (temporary)",
        f"{log} like '%debsnap: No source files found for%'": "debsnap failed (temporary)",
        f"{log} like '%cannot find:%debootsnap failed"
        " at /usr/bin/debrebuild line 48%'": "packages missing on metasnap (maybe temporary)",
        f"{log} not like '%cannot find:%' and {log} like '%debootsnap failed"
        " at /usr/bin/debrebuild line 48%'": "debootsnap failed (maybe temporary)",
        f"{log} like '%Validation FAILED!!%'": "dscverify failed (temporary)",
        f"{log} like '%400 URL must be absolute"
        "_E: Could not download%sbuild failed%'": "download failed (temporary)",
        f"{log} like '%E: Error creating chroot session: skipping%'": "sbuild chroot failed (temporary)",
        f"{log} like '%TRUNCATED DUE TO TIMEOUT: %'"
        f" and {log} like '%inputs/freedict_20%'": "timeout: freedict #998683",
        f"{log} like '%TRUNCATED DUE TO TIMEOUT: %'"
        f" and {log} not like '%inputs/freedict_20%'": "timeout",
        f"{log} like '%TRUNCATED DUE TO SIZE LIMIT: %'": "size limit",
        f"{log} like '%fakeroot not found, either install the fakeroot%'"
        fr" and {log} regexp 'dpkg is already the newest version \(1\.1[0-8]'": "old dpkg",
        f"{log} like '%fakeroot not found, either install the fakeroot%'"
        fr" and not {log} regexp 'dpkg is already the newest version \(1\.1[0-8]'": "fakeroot not found",
        f"{log} not like '%fakeroot not found, either install the fakeroot%'"
        f" and not ({r_packages})"
        f" and {log} like '%E: Build failure (dpkg-buildpackage died)%'": "dpkg-buildpackage failed",
        "b.diffoscope like '%TRUNCATED DUE TO TIMEOUT: 600 seconds%'": "diffoscope timeout",
        f"not ({r_packages}) and (b.diffoscope is null or"
        f" (b.diffoscope not like '%buildinfo_{arch}.gz%' and"
        " b.diffoscope not like '%buildinfo_all.gz%' and"
        " b.diffoscope not like '%TRUNCATED DUE TO TIMEOUT: 600 seconds%'))"
        fr" and ({log} regexp 'checking [^ ]*: $'"
        fr" or {log} regexp 'checking [^ ]*: size differs for [^ ]*$'"
        fr" or {log} regexp 'checking [^ ]*: size... $'"
        fr" or {log} regexp 'checking [^ ]*: size... value of [^ ]* differs for [^ ]*$')": "failed to reproduce",
        f"{log} like '%rebuilderd: unexpected error while rebuilding package:"
        " Failed to run diffoscope: No such file or directory (os error 2)%'": "diffoscope not found (fixed)",
        r_packages: "failed to reproduce: R package #1089197",
        f"b.diffoscope like '%buildinfo_{arch}.gz%' or b.diffoscope like '%buildinfo_all.gz%'": "dh_buildinfo",
        f"({log} like '%.deb: size... md5... sha256... sha1... all OK_' or {log} like '%.deb: size... md5... sha1... sha256... all OK_')": "rebuilderd error"
    }

    messages_packages = defaultdict(list)
    for error, message in error_messages.items():
        for row in cu.execute(
            "SELECT p.name FROM packages p LEFT JOIN builds b ON b.id = p.build_id"
            f" WHERE p.status = 'BAD' and {error}"
        ):
            messages_packages[message].append(row[0])

    package_logs = dict()
    bad_packages = set()
    has_diffoscope = set()
    for row in cu.execute("SELECT p.name, p.build_id, p.has_diffoscope FROM packages p WHERE p.status ='BAD'"):
        bad_packages.add(row[0])
        package_logs[row[0]] = int(row[1])
        if row[2]:
            has_diffoscope.add(row[0])
    found_packages = {pkg for lst in messages_packages.values() for pkg in lst}

    other_errors = list(bad_packages.difference(found_packages))
    if other_errors:
        messages_packages["other errors"] = other_errors

    print(
        '<!DOCTYPE html><html lang="en"><head>'
        '<meta charset="utf-8">'
        f"<title>https://{arch}.reproduce.debian.net/ stats</title>"
        '<meta name="viewport" content="width=device-width, initial-scale=1">'
        "</head><body>"
        f"<header><h1>https://{arch}.reproduce.debian.net/ stats</h1></header> <main>"
    )
    print(f"Last changed: {datetime.now().isoformat()}")

    print("<table> <tr> <th>error</th> <th>number of affected bad packages</th> </tr>")
    for message, packages in messages_packages.items():
        anchor = message.replace(" ", "-")
        print(
            f'<tr><td><a href="#{anchor}">{message}</a></td>'
            f'<td>{len(packages)} ({len(packages)/len(bad_packages)*100:.2f}%)</td></tr>'
        )
    print("</table>")

    def format_link(pkg):
        link = f'<a href="https://{arch}.reproduce.debian.net/api/v0/builds/{package_logs.get(pkg, 0)}/log">{pkg}</a><a href="https://tracker.debian.org/pkg/{pkg}">🍥</a>'
        if pkg in has_diffoscope:
            return f'{link}<a href="https://{arch}.reproduce.debian.net/api/v0/builds/{package_logs.get(pkg, 0)}/diffoscope">💠</a>'
        return link

    for message, packages in messages_packages.items():
        anchor = message.replace(" ", "-")
        print(f'<h2 id="{anchor}">{message}</h2>')
        packages = sorted(packages, key=lambda pkg: package_logs.get(pkg, 0))
        print(" ".join(format_link(pkg) for pkg in packages))

    print("</main></body></html>")


if __name__ == "__main__":
    main()