reproducible_db_maintenance.py 33 KB
Newer Older
1 2 3
#!/usr/bin/python3
# -*- coding: utf-8 -*-
#
4
# Copyright © 2015-2018 Mattia Rizzolo <mattia@mapreri.org>
5
# Copyright © 2015 Holger Levsen <holger@layer-acht.org>
6
# Based on various reproducible_* files © 2014-2015 Holger Levsen <holger@layer-acht.org>
7 8 9 10
# Licensed under GPL-2
#
# Depends: python3
#
11
# Track the database schema and changes to it. Also allow simple creation
12 13
# and migration of it.

14 15 16
import re
import sys
from datetime import datetime
17
from sqlalchemy.orm import sessionmaker
18 19 20

from rblib import query_db
from rblib.confparse import log
21
from rblib.const import DB_ENGINE, DB_METADATA
22
from rblib.utils import print_critical_message
23

24
now = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
25

26

27 28 29 30 31 32 33 34
# the original schema is here
db_schema = [
    {
        'name': 'rb_schema',
        'query': ['''CREATE TABLE rb_schema
                     (version INTEGER NOT NULL,
                      date TEXT NOT NULL,
                      PRIMARY KEY (version))''',
35
                  "INSERT INTO rb_schema VALUES (1, '" + now + "')"]
36 37 38 39 40 41 42 43 44
    },
    {
        'name': 'source_packages',
        'query': ['''CREATE TABLE source_packages
                     (name TEXT NOT NULL,
                      version TEXT NOT NULL,
                      status TEXT NOT NULL
                      CHECK
                        (status IN
45 46
                            ('blacklisted', 'FTBFS', 'reproducible',
                             'unreproducible', '404', 'not for us')
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
                        ),
                      build_date TEXT NOT NULL,
                      PRIMARY KEY (name))''']
    },
    {
        'name': 'sources_scheduled',
        'query': ['''CREATE TABLE sources_scheduled
                     (name TEXT NOT NULL,
                      date_scheduled TEXT NOT NULL,
                      date_build_started TEXT NOT NULL,
                      PRIMARY KEY (name))''']
    },
    {
        'name': 'sources',
        'query': ['''CREATE TABLE sources
                     (name TEXT NOT NULL,
                      version TEXT NOT NULL)''']
    },
    {
        'name': 'stats_pkg_state',
        'query': ['''CREATE TABLE stats_pkg_state
                     (datum TEXT NOT NULL,
                      suite TEXT NOT NULL,
                      untested INTEGER,
                      reproducible INTEGER,
                      unreproducible INTEGER,
                      FTBFS INTEGER,
                      other INTEGER,
                      PRIMARY KEY (datum))''']
    },
    {
        'name': 'stats_builds_per_day',
        'query': ['''CREATE TABLE stats_builds_per_day
                     (datum TEXT NOT NULL,
                      suite TEXT NOT NULL,
                      reproducible INTEGER,
                      unreproducible INTEGER,
                      FTBFS INTEGER,
                      other INTEGER,
                      PRIMARY KEY (datum))''']
    },
    {
        'name': 'stats_builds_age',
        'query': ['''CREATE TABLE stats_builds_age
                     (datum TEXT NOT NULL,
                      suite TEXT NOT NULL,
                      oldest_reproducible REAL,
                      oldest_unreproducible REAL,
                      oldest_FTBFS REAL,
                      PRIMARY KEY (datum))''']
    },
    {
        'name': 'stats_bugs',
        'query': ['''CREATE TABLE stats_bugs
                     (datum TEXT NOT NULL,
                      open_toolchain INTEGER,
                      done_toolchain INTEGER,
                      open_infrastructure INTEGER,
                      done_infrastructure INTEGER,
                      open_timestamps INTEGER,
                      done_timestamps INTEGER,
                      open_fileordering INTEGER,
                      done_fileordering INTEGER,
                      open_buildpath INTEGER,
                      done_buildpath INTEGER,
                      open_username INTEGER,
                      done_username INTEGER,
                      open_hostname INTEGER,
                      done_hostname INTEGER,
                      open_uname INTEGER,
                      done_uname INTEGER,
                      open_randomness INTEGER,
                      done_randomness INTEGER,
                      open_buildinfo INTEGER,
                      done_buildinfo INTEGER,
                      open_cpu INTEGER,
                      done_cpu INTEGER,
124 125 126 127
                      open_signatures INTEGER,
                      done_signatures INTEGER,
                      open_environment INTEGER,
                      one_environment INTEGER,
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
                      PRIMARY KEY (datum))''']
    },
    {
        'name': 'stats_notes',
        'query': ['''CREATE TABLE stats_notes
                     (datum TEXT NOT NULL,
                      packages_with_notes INTEGER,
                      PRIMARY KEY (datum))''']
    },
    {
        'name': 'stats_issues',
        'query': ['''CREATE TABLE stats_issues
                     (datum TEXT NOT NULL,
                      known_issues INTEGER,
                      PRIMARY KEY (datum))''']
    },
    {
        'name': 'stats_meta_pkg_state',
        'query': ['''CREATE TABLE stats_meta_pkg_state
                     (datum TEXT NOT NULL,
                      suite TEXT NOT NULL,
                      meta_pkg TEXT NOT NULL,
                      reproducible INTEGER,
                      unreproducible INTEGER,
                      FTBFS INTEGER,
                      other INTEGER,
                      PRIMARY KEY (datum, suite, meta_pkg))''']
    }
]

158
# and here are some queries, split by update, that can be used to
159 160
# update the live schema
schema_updates = {
161
    1: [],
162 163 164 165 166 167 168 169
    2: [  # do a funny dance to add an id, suite and architecture values to
          # the `suites` table
        '''CREATE TABLE sources_new_tmp
           (id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            version TEXT NOT NULL,
            suite TEXT,
            architecture TEXT,
170
            UNIQUE (name, suite, architecture))''',
171 172 173 174 175 176
        '''CREATE TABLE sources_new
           (id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            version TEXT NOT NULL,
            suite TEXT NOT NULL,
            architecture TEXT NOT NULL,
177
            UNIQUE (name, suite, architecture))''',
178
        'INSERT INTO sources_new_tmp (name, version) SELECT * FROM sources',
179
        "UPDATE sources_new_tmp SET suite='sid', architecture='amd64'",
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
        'INSERT INTO sources_new SELECT * FROM sources_new_tmp',
        'DROP TABLE sources_new_tmp',
        'DROP TABLE sources',
        'ALTER TABLE sources_new RENAME TO sources',
        # now that we have an id in `sources` rework all tables to join
        # against this table, and avoid duplicating data
        # `schedule`:
        '''CREATE TABLE schedule
           (id INTEGER PRIMARY KEY,
            package_id INTEGER NOT NULL,
            date_scheduled TEXT NOT NULL,
            date_build_started TEXT NOT NULL,
            save_artifacts INTEGER DEFAULT 0,
            UNIQUE (package_id),
            FOREIGN KEY(package_id) REFERENCES sources(id))''',
        '''INSERT INTO schedule (package_id, date_scheduled, date_build_started)
           SELECT s.id, p.date_scheduled, p.date_build_started
           FROM sources AS s JOIN sources_scheduled AS p ON s.name = p.name''',
        'DROP TABLE sources_scheduled',
        # `results`:
        '''CREATE TABLE results
           (id INTEGER PRIMARY KEY,
            package_id INTEGER NOT NULL,
            version TEXT NOT NULL,
            status TEXT,
            build_date TEXT,
            build_duration TEXT DEFAULT '0',
207
            UNIQUE (package_id),
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
            FOREIGN KEY(package_id) REFERENCES sources(id))''',
        '''INSERT INTO results (package_id, version, status, build_date)
           SELECT s.id, r.version, r.status, r.build_date
           FROM sources AS s JOIN source_packages as r ON s.name = r.name''',
        'DROP TABLE source_packages',
        # `stats_builds`: (completely new table where we save every build)
        '''CREATE TABLE stats_build
           (id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            version TEXT NOT NULL,
            suite TEXT NOT NULL,
            architecture TEXT NOT NULL,
            status TEXT NOT NULL,
            build_date TEXT NOT NULL,
            build_duration TEXT NOT NULL,
            UNIQUE (name, version, suite, architecture, build_date))''',
224
    ],
225
    3: [  # add columns to stats_bugs for new usertag umask
226 227
        '''ALTER TABLE stats_bugs ADD COLUMN open_umask INTEGER''',
        '''ALTER TABLE stats_bugs ADD COLUMN done_umask INTEGER''',
228
    ],
229
    4: [  # stats_pkg_state needs (datum, suite) as primary key
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
        '''CREATE TABLE stats_pkg_state_tmp
           (datum TEXT NOT NULL,
            suite TEXT NOT NULL,
            untested INTEGER,
            reproducible INTEGER,
            unreproducible INTEGER,
            FTBFS INTEGER,
            other INTEGER,
            PRIMARY KEY (datum, suite))''',
        '''INSERT INTO stats_pkg_state_tmp (datum, suite, untested,
            reproducible, unreproducible, FTBFS, other)
            SELECT datum, suite, untested, reproducible, unreproducible,
            FTBFS, other FROM stats_pkg_state;''',
        '''DROP TABLE stats_pkg_state;''',
        '''ALTER TABLE stats_pkg_state_tmp RENAME TO stats_pkg_state;''',
245
    ],
246
    5: [  # stats_builds_per_day needs (datum, suite) as primary key
247 248 249 250 251 252 253 254 255 256 257 258 259 260
        '''CREATE TABLE stats_builds_per_day_tmp
                     (datum TEXT NOT NULL,
                      suite TEXT NOT NULL,
                      reproducible INTEGER,
                      unreproducible INTEGER,
                      FTBFS INTEGER,
                      other INTEGER,
                      PRIMARY KEY (datum, suite))''',
        '''INSERT INTO stats_builds_per_day_tmp (datum, suite,
            reproducible, unreproducible, FTBFS, other)
            SELECT datum, suite, reproducible, unreproducible,
            FTBFS, other FROM stats_builds_per_day;''',
        '''DROP TABLE stats_builds_per_day;''',
        '''ALTER TABLE stats_builds_per_day_tmp RENAME TO stats_builds_per_day;''',
261
    ],
262
    6: [  # stats_builds_age needs (datum, suite) as primary key
263 264 265 266 267 268 269 270 271 272
        '''CREATE TABLE stats_builds_age_tmp
                     (datum TEXT NOT NULL,
                      suite TEXT NOT NULL,
                      oldest_reproducible REAL,
                      oldest_unreproducible REAL,
                      oldest_FTBFS REAL,
                      PRIMARY KEY (datum, suite))''',
        '''INSERT INTO stats_builds_age_tmp (datum, suite,
            oldest_reproducible, oldest_unreproducible, oldest_FTBFS)
            SELECT datum, suite, oldest_reproducible, oldest_unreproducible,
Holger Levsen's avatar
Holger Levsen committed
273
            oldest_FTBFS FROM stats_builds_age;''',
274 275
        '''DROP TABLE stats_builds_age;''',
        '''ALTER TABLE stats_builds_age_tmp RENAME TO stats_builds_age;''',
276
    ],
277
    7: [  # change build_duration field in results and stats_build from str to int
278 279 280 281 282 283 284 285 286 287
        '''CREATE TABLE stats_build_tmp
           (id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            version TEXT NOT NULL,
            suite TEXT NOT NULL,
            architecture TEXT NOT NULL,
            status TEXT NOT NULL,
            build_date TEXT NOT NULL,
            build_duration INTEGER NOT NULL,
            UNIQUE (name, version, suite, architecture, build_date))''',
288 289 290
        '''INSERT INTO stats_build_tmp
            SELECT id, name, version, suite, architecture, status, build_date,
            CAST (build_duration AS INTEGER) FROM stats_build''',
291 292 293 294 295 296 297 298 299
        'DROP TABLE stats_build',
        'ALTER TABLE stats_build_tmp RENAME TO stats_build',
        '''CREATE TABLE results_tmp
           (id INTEGER PRIMARY KEY,
            package_id INTEGER NOT NULL,
            version TEXT NOT NULL,
            status TEXT,
            build_date TEXT,
            build_duration INTEGER DEFAULT '0',
300
            UNIQUE (package_id),
301
            FOREIGN KEY(package_id) REFERENCES sources(id))''',
302 303 304
        '''INSERT INTO results_tmp
            SELECT id, package_id, version, status,
            build_date, CAST (build_duration AS INTEGER) FROM results''',
305 306
        'DROP TABLE results',
        'ALTER TABLE results_tmp RENAME TO results',
307
    ],
308
    8: [  # add default value to stats_bugs to get a full 'done vs open bugs' graph
309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
        '''CREATE TABLE stats_bugs_tmp
           (datum TEXT NOT NULL,
            open_toolchain INTEGER DEFAULT '0',
            done_toolchain INTEGER DEFAULT '0',
            open_infrastructure INTEGER DEFAULT '0',
            done_infrastructure INTEGER DEFAULT '0',
            open_timestamps INTEGER DEFAULT '0',
            done_timestamps INTEGER DEFAULT '0',
            open_fileordering INTEGER DEFAULT '0',
            done_fileordering INTEGER DEFAULT '0',
            open_buildpath INTEGER DEFAULT '0',
            done_buildpath INTEGER DEFAULT '0',
            open_username INTEGER DEFAULT '0',
            done_username INTEGER DEFAULT '0',
            open_hostname INTEGER DEFAULT '0',
            done_hostname INTEGER DEFAULT '0',
            open_uname INTEGER DEFAULT '0',
            done_uname INTEGER DEFAULT '0',
            open_randomness INTEGER DEFAULT '0',
            done_randomness INTEGER DEFAULT '0',
            open_buildinfo INTEGER DEFAULT '0',
            done_buildinfo INTEGER DEFAULT '0',
            open_cpu INTEGER DEFAULT '0',
            done_cpu INTEGER DEFAULT '0',
            open_signatures INTEGER DEFAULT '0',
            done_signatures INTEGER DEFAULT '0',
            open_environment INTEGER DEFAULT '0',
Holger Levsen's avatar
Holger Levsen committed
336
            done_environment INTEGER DEFAULT '0',
337 338 339 340 341 342
            open_umask INTEGER DEFAULT '0',
            done_umask INTEGER DEFAULT '0',
            PRIMARY KEY (datum))''',
        'INSERT INTO stats_bugs_tmp SELECT * FROM stats_bugs',
        'DROP TABLE stats_bugs',
        'ALTER TABLE stats_bugs_tmp RENAME TO stats_bugs',
343
    ],
344
    9: [  # rename 'sid' to 'unstable'
345 346 347 348 349 350
        "UPDATE sources SET suite = 'unstable' WHERE suite = 'sid'",
        "UPDATE stats_build SET suite = 'unstable' WHERE suite = 'sid'",
        "UPDATE stats_pkg_state SET suite = 'unstable' WHERE suite = 'sid'",
        "UPDATE stats_builds_per_day SET suite = 'unstable' WHERE suite = 'sid'",
        "UPDATE stats_builds_age SET suite = 'unstable' WHERE suite = 'sid'",
        "UPDATE stats_meta_pkg_state SET suite = 'unstable' WHERE suite = 'sid'",
351
    ],
352
    10: [  # add the notes and issues tables
353 354 355 356 357 358 359 360 361 362 363 364 365
        '''CREATE TABLE notes (
            package_id INTEGER,
            version TEXT NOT NULL,
            issues TEXT,
            bugs TEXT,
            comments TEXT,
            PRIMARY KEY (package_id),
            FOREIGN KEY(package_id) REFERENCES sources(id))''',
        '''CREATE TABLE issues (
            name TEXT NOT NULL,
            description TEXT NOT NULL,
            url TEXT,
            PRIMARY KEY (name))''',
366
    ],
367
    11: [  # table with removed packages, to enable the maintenance job to do clean up
368 369 370 371 372
        '''CREATE TABLE removed_packages (
            name TEXT NOT NULL,
            suite TEXT NOT NULL,
            architecture TEXT NOT NULL,
            PRIMARY KEY (name, suite, architecture))''',
373
    ],
374 375
    12: [  # refactor the artifacts handling, splitting artifacts saving from
           # IRC notification
376
        'ALTER TABLE schedule ADD COLUMN notify TEXT',
377
    ],
378
    13: [  # record manual scheduling done, to be able to limit people
379 380 381 382
        '''CREATE TABLE manual_scheduler (
            id INTEGER PRIMARY KEY,
            package_id INTEGER NOT NULL,
            requester TEXT NOT NULL,
383 384
            date_request INTEGER NOT NULL)''',
        'ALTER TABLE schedule ADD COLUMN scheduler TEXT',
385
    ],
386
    14: [  # column to enable mail notification to maintainers
387
        'ALTER TABLE sources ADD COLUMN notify_maintainer INTEGER NOT NULL DEFAULT 0',
388
    ],
389
    15: [  # add columns to stats_bugs for new usertag ftbfs
390 391
        '''ALTER TABLE stats_bugs ADD COLUMN open_ftbfs INTEGER''',
        '''ALTER TABLE stats_bugs ADD COLUMN done_ftbfs INTEGER''',
392
    ],
393
    16: [  # add default value to stats_bugs.(open|done)_ftbfs to get a full 'done vs open bugs' graph
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
        '''CREATE TABLE stats_bugs_tmp
           (datum TEXT NOT NULL,
            open_toolchain INTEGER DEFAULT '0',
            done_toolchain INTEGER DEFAULT '0',
            open_infrastructure INTEGER DEFAULT '0',
            done_infrastructure INTEGER DEFAULT '0',
            open_timestamps INTEGER DEFAULT '0',
            done_timestamps INTEGER DEFAULT '0',
            open_fileordering INTEGER DEFAULT '0',
            done_fileordering INTEGER DEFAULT '0',
            open_buildpath INTEGER DEFAULT '0',
            done_buildpath INTEGER DEFAULT '0',
            open_username INTEGER DEFAULT '0',
            done_username INTEGER DEFAULT '0',
            open_hostname INTEGER DEFAULT '0',
            done_hostname INTEGER DEFAULT '0',
            open_uname INTEGER DEFAULT '0',
            done_uname INTEGER DEFAULT '0',
            open_randomness INTEGER DEFAULT '0',
            done_randomness INTEGER DEFAULT '0',
            open_buildinfo INTEGER DEFAULT '0',
            done_buildinfo INTEGER DEFAULT '0',
            open_cpu INTEGER DEFAULT '0',
            done_cpu INTEGER DEFAULT '0',
            open_signatures INTEGER DEFAULT '0',
            done_signatures INTEGER DEFAULT '0',
            open_environment INTEGER DEFAULT '0',
            done_environment INTEGER DEFAULT '0',
            open_umask INTEGER DEFAULT '0',
            done_umask INTEGER DEFAULT '0',
            open_ftbfs INTEGER DEFAULT '0',
            done_ftbfs INTEGER DEFAULT '0',
            PRIMARY KEY (datum))''',
        'INSERT INTO stats_bugs_tmp SELECT * FROM stats_bugs',
        'DROP TABLE stats_bugs',
        'ALTER TABLE stats_bugs_tmp RENAME TO stats_bugs',
430
    ],
431
    17: [  # add column to save which builders builds what
432 433 434
        "ALTER TABLE schedule ADD COLUMN builder TEXT",
        "ALTER TABLE results ADD COLUMN builder TEXT NOT NULL DEFAULT ''",
        "ALTER TABLE stats_build ADD COLUMN builder TEXT NOT NULL DEFAULT ''",
435
    ],
436
    18: [  # add columns to stats_bugs for new usertag locale
437 438
        '''ALTER TABLE stats_bugs ADD COLUMN open_locale INTEGER DEFAULT 0''',
        '''ALTER TABLE stats_bugs ADD COLUMN done_locale INTEGER DEFAULT 0''',
439
    ],
440 441
    19: [  # add column architecture to stats_pkg_state, stats_builds_per_day
           # and stats_builds_age tables and set previous values to amd64
442 443 444
        "ALTER TABLE stats_pkg_state ADD COLUMN architecture TEXT NOT NULL DEFAULT 'amd64'",
        "ALTER TABLE stats_builds_per_day ADD COLUMN architecture TEXT NOT NULL DEFAULT 'amd64'",
        "ALTER TABLE stats_builds_age ADD COLUMN architecture TEXT NOT NULL DEFAULT 'amd64'",
445
    ],
446
    20: [  # use (datum, suite, architecture) as primary key for stats_pkg_state
447 448 449 450 451 452 453 454 455 456
        '''CREATE TABLE stats_pkg_state_tmp
           (datum TEXT NOT NULL,
            suite TEXT NOT NULL,
            architecture TEXT NOT NULL,
            untested INTEGER,
            reproducible INTEGER,
            unreproducible INTEGER,
            FTBFS INTEGER,
            other INTEGER,
            PRIMARY KEY (datum, suite, architecture))''',
457
        '''INSERT INTO stats_pkg_state_tmp (datum, suite, architecture, untested,
458
            reproducible, unreproducible, FTBFS, other)
459
            SELECT datum, suite, architecture, untested, reproducible, unreproducible,
460 461 462
            FTBFS, other FROM stats_pkg_state;''',
        '''DROP TABLE stats_pkg_state;''',
        '''ALTER TABLE stats_pkg_state_tmp RENAME TO stats_pkg_state;''',
463
    ],
464
    21: [  # use (datum, suite, architecture) as primary key for stats_builds_per_day
465 466 467 468 469 470 471 472 473
        '''CREATE TABLE stats_builds_per_day_tmp
                     (datum TEXT NOT NULL,
                      suite TEXT NOT NULL,
                      architecture TEXT NOT NULL,
                      reproducible INTEGER,
                      unreproducible INTEGER,
                      FTBFS INTEGER,
                      other INTEGER,
                      PRIMARY KEY (datum, suite, architecture))''',
474
        '''INSERT INTO stats_builds_per_day_tmp (datum, suite, architecture,
475
            reproducible, unreproducible, FTBFS, other)
476
            SELECT datum, suite, architecture, reproducible, unreproducible,
477 478 479
            FTBFS, other FROM stats_builds_per_day;''',
        '''DROP TABLE stats_builds_per_day;''',
        '''ALTER TABLE stats_builds_per_day_tmp RENAME TO stats_builds_per_day;''',
480
    ],
481
    22: [  # use (datum, suite, architecture) as primary key for stats_builds_age
482 483 484 485 486 487 488 489
        '''CREATE TABLE stats_builds_age_tmp
                     (datum TEXT NOT NULL,
                      suite TEXT NOT NULL,
                      architecture TEXT NOT NULL,
                      oldest_reproducible REAL,
                      oldest_unreproducible REAL,
                      oldest_FTBFS REAL,
                      PRIMARY KEY (datum, suite, architecture))''',
490
        '''INSERT INTO stats_builds_age_tmp (datum, suite, architecture,
491
            oldest_reproducible, oldest_unreproducible, oldest_FTBFS)
492
            SELECT datum, suite, architecture, oldest_reproducible, oldest_unreproducible,
493 494 495
            oldest_FTBFS FROM stats_builds_age;''',
        '''DROP TABLE stats_builds_age;''',
        '''ALTER TABLE stats_builds_age_tmp RENAME TO stats_builds_age;''',
496
    ],
497 498
    23: [  # save which builders built a package and change the name of the
           # field keeping the job name
499 500 501 502 503 504 505 506 507
        '''CREATE TABLE stats_build_tmp
            (id INTEGER PRIMARY KEY,
             name TEXT NOT NULL,
             version TEXT NOT NULL,
             suite TEXT NOT NULL,
             architecture TEXT NOT NULL,
             status TEXT NOT NULL,
             build_date TEXT NOT NULL,
             build_duration TEXT NOT NULL,
508 509
             node1 TEXT NOT NULL DEFAULT '',
             node2 TEXT NOT NULL DEFAULT '',
510 511 512 513 514 515 516 517
             job TEXT NOT NULL,
             UNIQUE (name, version, suite, architecture, build_date))''',
        '''INSERT INTO stats_build_tmp (id, name, version, suite, architecture,
                    status, build_date, build_duration, job)
           SELECT id, name, version, suite, architecture, status, build_date,
                    build_duration, builder FROM stats_build''',
        'DROP TABLE stats_build',
        'ALTER TABLE stats_build_tmp RENAME TO stats_build',
518
    ],
519
    24: [  # the same as #23 but for the results table
520 521 522 523 524 525 526 527 528 529
        '''CREATE TABLE results_tmp
           (id INTEGER PRIMARY KEY,
            package_id INTEGER NOT NULL,
            version TEXT NOT NULL,
            status TEXT NOT NULL,
            build_date TEXT NOT NULL,
            build_duration INTEGER DEFAULT 0,
            node1 TEXT,
            node2 TEXT,
            job TEXT NOT NULL,
530
            UNIQUE (package_id),
531 532 533 534 535 536 537
            FOREIGN KEY(package_id) REFERENCES sources(id))''',
        '''INSERT INTO results_tmp (id, package_id, version, status,
                    build_date, build_duration, job)
           SELECT id, package_id, version, status, build_date, build_duration,
                    builder FROM results''',
        'DROP TABLE results',
        'ALTER TABLE results_tmp RENAME TO results',
538
    ],
539
    25: [  # rename the builder column also in the schedule table.
540 541 542 543 544 545 546 547 548 549 550
        '''CREATE TABLE schedule_tmp
           (id INTEGER PRIMARY KEY,
            package_id INTEGER NOT NULL,
            date_scheduled TEXT NOT NULL,
            scheduler TEXT,
            date_build_started TEXT,
            job TEXT,
            notify TEXT NOT NULL DEFAULT '',
            save_artifacts INTEGER DEFAULT 0,
            UNIQUE (package_id),
            FOREIGN KEY(package_id) REFERENCES sources(id))''',
551
        '''UPDATE schedule SET notify = '' WHERE notify IS NULL''',
552 553 554 555 556 557 558
        '''INSERT INTO schedule_tmp (id, package_id, date_scheduled, scheduler,
                    date_build_started, job, notify, save_artifacts)
           SELECT id, package_id, date_scheduled, scheduler,
                    date_build_started, builder, notify, save_artifacts
           FROM schedule''',
        'DROP TABLE schedule',
        'ALTER TABLE schedule_tmp RENAME TO schedule',
559
    ],
560
    26: [  # add a column to the schedule table to save the schedule message
561 562
        "ALTER TABLE schedule ADD COLUMN message TEXT",
        "ALTER TABLE stats_build ADD COLUMN schedule_message TEXT NOT NULL DEFAULT ''",
563
    ],
564
    27: [  # add column architecture to stats_meta_pkg_state and set previous values to amd64
565
        "ALTER TABLE stats_meta_pkg_state ADD COLUMN architecture TEXT NOT NULL DEFAULT 'amd64'",
566
    ],
567
    28: [  # use (datum, suite, architecture, meta_pkg) as primary key for stats_meta_pkg_state
568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
        '''CREATE TABLE stats_meta_pkg_state_tmp
           (datum TEXT NOT NULL,
            suite TEXT NOT NULL,
            architecture TEXT NOT NULL,
            meta_pkg TEXT NOT NULL,
            reproducible INTEGER,
            unreproducible INTEGER,
            FTBFS INTEGER,
            other INTEGER,
            PRIMARY KEY (datum, suite, architecture, meta_pkg))''',
        '''INSERT INTO stats_meta_pkg_state_tmp (datum, suite, architecture, meta_pkg,
            reproducible, unreproducible, FTBFS, other)
            SELECT datum, suite, architecture, meta_pkg, reproducible, unreproducible,
            FTBFS, other FROM stats_meta_pkg_state;''',
        '''DROP TABLE stats_meta_pkg_state;''',
        '''ALTER TABLE stats_meta_pkg_state_tmp RENAME TO stats_meta_pkg_state;''',
584
    ],
585

586
    # THE FOLLOWING UPDATES CAN ONLY BE PREFORMED ON POSTGRES DATABASE
587

588
    29: [  # Add auto incrementing to the id columns of some tables
589 590 591 592 593 594 595 596 597 598 599 600 601
        "CREATE SEQUENCE schedule_id_seq",
        "ALTER TABLE schedule ALTER id SET DEFAULT NEXTVAL('schedule_id_seq')",
        "CREATE SEQUENCE manual_scheduler_id_seq",
        """ALTER TABLE manual_scheduler ALTER id SET DEFAULT
            NEXTVAL('manual_scheduler_id_seq')""",
        "CREATE SEQUENCE sources_id_seq",
        "ALTER TABLE sources ALTER id SET DEFAULT NEXTVAL('sources_id_seq')",
        "CREATE SEQUENCE stats_build_id_seq",
        """ALTER TABLE stats_build ALTER id SET DEFAULT
            NEXTVAL('stats_build_id_seq')""",
        "CREATE SEQUENCE results_id_seq",
        "ALTER TABLE results ALTER id SET DEFAULT NEXTVAL('results_id_seq')",
    ],
602
    30: [  # Add new table to track diffoscope breake
603 604
        '''CREATE TABLE stats_breakages
                     (datum TEXT,
605 606
                      diffoscope_timeouts INTEGER,
                      diffoscope_crashes INTEGER,
607
                      PRIMARY KEY (datum))''',
608
    ],
609
    31: [  # rename the 'testing' suite into 'stretch'
610
        "UPDATE {} SET suite='stretch' WHERE suite='testing'".format(t)
611 612
        for t in ("sources", "stats_pkg_state", "stats_builds_per_day",
                  "stats_builds_age", "stats_meta_pkg_state", "stats_build")
613
    ],
614
    32: [  # copy stretch packages (includng results) in buster
615 616 617
        """INSERT INTO sources (name, version, suite, architecture, notify_maintainer)
            SELECT name, version, 'buster', architecture, notify_maintainer
            FROM sources
618
            WHERE suite = 'stretch'""",
619 620 621 622 623 624 625 626 627 628 629 630 631 632
        """WITH buster AS (
                SELECT id, name, suite, architecture, version
                FROM sources WHERE suite = 'buster'),
            sr AS (
                SELECT s.name, s.architecture, r.id, r.version, r.status,
                    r.build_date, r.build_duration, r.node1, r.node2, r.job
                FROM sources AS s JOIN results AS r ON s.id=r.package_id
                WHERE s.suite = 'stretch')
            INSERT INTO results (package_id, version, status, build_date,
                    build_duration, node1, node2, job)
                SELECT b.id, sr.version, sr.status, sr.build_date,
                    sr.build_duration, sr.node1, sr.node2, sr.job
                FROM buster AS b JOIN sr ON b.name=sr.name
                    AND b.architecture=sr.architecture""",
633
    ],
634
    33: [  # the message columns.  They are not actually needed.
635 636
        "ALTER TABLE schedule DROP COLUMN message",
        "ALTER TABLE stats_build DROP COLUMN schedule_message",
637
    ],
638
    34: [  # rename status "not for us" to "NFU"
639 640 641
        "UPDATE results SET status='NFU' WHERE status='not for us'",
        "UPDATE stats_build SET status='NFU' WHERE status='not for us'",
    ],
642
    35: [  # rename status "unreproducible" to "FTBR"
643 644 645 646 647 648
        "UPDATE results SET status='FTBR' WHERE status='unreproducible'",
        "UPDATE stats_build SET status='FTBR' WHERE status='unreproducible'",
        "ALTER TABLE stats_pkg_state RENAME COLUMN unreproducible to FTBR",
        "ALTER TABLE stats_meta_pkg_state RENAME COLUMN unreproducible to FTBR",
        "ALTER TABLE stats_builds_per_day RENAME COLUMN unreproducible to FTBR",
        "ALTER TABLE stats_builds_age " + \
649
        "RENAME COLUMN oldest_unreproducible to oldest_FTBR",
650
    ],
651
    36: [  # rename status "404" to "E404"
652 653 654
        "UPDATE results SET status='E404' WHERE status='404'",
        "UPDATE stats_build SET status='E404' WHERE status='404'",
    ],
655 656 657
    37: [  # change the data type in the stats_build.build_date column
        "ALTER TABLE stats_build ALTER COLUMN build_date SET DATA TYPE timestamp"
        " USING build_date::timestamp"
658 659 660 661 662 663 664 665 666 667 668 669 670
    ],
    38: [  # add a distribution field to the sources tables
        """CREATE TABLE distributions (
                id SERIAL PRIMARY KEY,
                name VARCHAR)""",
        "INSERT INTO distributions (name) VALUES ('debian')",
        """ALTER TABLE sources
            ADD COLUMN distribution INTEGER DEFAULT 1
            REFERENCES distributions(id)""",
        """ALTER TABLE stats_build
            ADD COLUMN distribution INTEGER DEFAULT 1
            REFERENCES distributions(id)""",
    ],
671 672 673 674
    39: [  # fix build_duration datatype
        "ALTER TABLE stats_build ALTER COLUMN build_duration SET DATA TYPE integer"
        " USING build_duration::integer"
    ],
675 676 677 678
    40: [  # add some new distributions
        "INSERT INTO distributions (name) VALUES "
        "('opensuse'), ('archlinux'), ('alpine')"
    ],
679 680 681 682 683 684 685 686
    41: [  # mark archlinux packages as archlinx packages
        """UPDATE sources SET distribution=(
            SELECT id FROM distributions WHERE name='archlinux')
           WHERE suite LIKE 'archlinux_%%'""",
        """UPDATE stats_build SET distribution=(
            SELECT id FROM distributions WHERE name='archlinux')
           WHERE suite LIKE 'archlinux_%%'"""
    ],
687 688 689
}


690 691 692 693 694 695 696 697
def table_exists(tablename):
    DB_METADATA.reflect()
    if tablename in DB_METADATA.tables:
        return True
    else:
        return False


698 699
def db_create_tables():
    """
700
    Check whether all tables are present, and create them if not.
701 702 703
    The check is done against sqlite_master, a reserved sqlite table
    containing all database metadata.
    """
704
    changed = False
705
    for table in db_schema:
706
        if not table_exists(table['name']):
707 708 709 710
            log.warning(table['name'] + ' does not exists. Creating...')
            for query in table['query']:
                log.info('\t' + re.sub(' +', ' ', query.replace('\n', ' ')))
                query_db(query)
711 712
                changed = True
    return changed
713 714 715 716 717


def db_update():
    """
    Update the database schema.
718
    Get a list of queries to perform from schema_updates.
719
    The need for an update is detected by checking the biggest value in the
720
    rb_schema table against the biggest value in the schema_updates dictionary.
721 722 723 724 725 726 727 728
    """
    current = query_db('SELECT MAX(version) FROM rb_schema')[0][0]
    if not current:
        log.warning('This is probably a new database, there are no ' +
                    'previous updates noted')
        current = 0
    last = max(schema_updates.keys())
    if current == last:
729
        return False
730
    if current > last:
731 732
        print_critical_message('The active database schema is higher than' +
                               '  the last update available.\nPlease check!')
733 734
        sys.exit(1)
    log.info('Found schema updates.')
735 736
    Session = sessionmaker(bind=DB_ENGINE, autocommit=True)
    session = Session()
737 738
    for update in range(current+1, last+1):
        log.info('Applying database update #' + str(update) + '. Queries:')
739
        startTime = datetime.now()
740 741 742 743
        with session.begin():
            for query in schema_updates[update]:
                log.info('\t' + query)
                session.execute(query)
744 745 746 747
            session.execute(
                "INSERT INTO rb_schema (version, date) "
                "VALUES (:ver, CURRENT_TIMESTAMP)", {'ver': update}
            )
748
        log.info(str(len(schema_updates[update])) + ' queries executed in ' +
749
                 str(datetime.now() - startTime))
750
    return True
751 752 753


if __name__ == '__main__':
754
    changed_created = False
755
    if table_exists('rb_schema'):
756
        if not query_db('SELECT * FROM rb_schema'):
757
            # table exists but there is nothing in it
758
            changed_create = db_create_tables()
759
    else:
760 761
        log.error('There is no rb_schema table in the database.')
        log.error('Will run a full db_create_tables().')
762 763 764
        changed_created = db_create_tables()
    changed = db_update()
    if changed or changed_created:
765 766
        log.info('Total execution time: ' + str(datetime.now() -
                 datetime.strptime(now, "%Y-%m-%d-%H-%M-%S")))
767 768
    else:
        log.info('No pending updates.')