Unverified Commit b1b64d98 authored by Valerie R Young's avatar Valerie R Young Committed by Holger Levsen
Browse files

reproducible debian: switch python database backend to SQLAlchemy



Signed-off-by: Holger Levsen's avatarHolger Levsen <holger@layer-acht.org>
parent da31c830
...@@ -29,6 +29,7 @@ from traceback import print_exception ...@@ -29,6 +29,7 @@ from traceback import print_exception
from subprocess import call, check_call from subprocess import call, check_call
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
from datetime import datetime, timedelta from datetime import datetime, timedelta
from sqlalchemy import MetaData, Table, sql, create_engine
DEBUG = False DEBUG = False
QUIET = False QUIET = False
...@@ -92,6 +93,11 @@ with open(os.path.join(BIN_PATH, './meta_pkgset.csv'), newline='') as f: ...@@ -92,6 +93,11 @@ with open(os.path.join(BIN_PATH, './meta_pkgset.csv'), newline='') as f:
for line in csv.reader(f): for line in csv.reader(f):
META_PKGSET[int(line[0])] = (line[2], line[1]) META_PKGSET[int(line[0])] = (line[2], line[1])
# init the database data and connection
DB_ENGINE = create_engine("sqlite:///" + REPRODUCIBLE_DB)
DB_METADATA = MetaData(DB_ENGINE) # Get all table definitions
conn_db = DB_ENGINE.connect() # the local sqlite3 reproducible db
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
group = parser.add_mutually_exclusive_group() group = parser.add_mutually_exclusive_group()
group.add_argument("-d", "--debug", action="store_true") group.add_argument("-d", "--debug", action="store_true")
...@@ -349,18 +355,46 @@ def write_html_page(title, body, destfile, no_header=False, style_note=False, ...@@ -349,18 +355,46 @@ def write_html_page(title, body, destfile, no_header=False, style_note=False,
with open(destfile, 'w', encoding='UTF-8') as fd: with open(destfile, 'w', encoding='UTF-8') as fd:
fd.write(html) fd.write(html)
def start_db_connection():
return sqlite3.connect(REPRODUCIBLE_DB, timeout=60) def db_table(table_name):
"""Returns a SQLAlchemy Table objects to be used in queries
using SQLAlchemy's Expressive Language.
Arguments:
table_name: a string corrosponding to an existing table name
"""
try:
return Table(table_name, DB_METADATA, autoload=True)
except sqlalchemy.exc.NoSuchTableError:
log.error("Table %s does not exist or schema for %s could not be loaded",
table_name, REPRODUCIBLE_DB)
raise
def query_db(query): def query_db(query):
cursor = conn_db.cursor() """Excutes a raw SQL query. Return depends on query type.
Returns:
select:
list of tuples
update or delete:
the number of rows affected
insert:
None
"""
try: try:
cursor.execute(query) result = conn_db.execute(query)
except: except:
print_critical_message('Error executing this query:\n' + query) print_critical_message('Error executing this query:\n' + query)
raise raise
conn_db.commit()
return cursor.fetchall() if result.returns_rows:
return result.fetchall()
elif result.supports_sane_rowcount() and result.rowcount > -1:
return result.rowcount
else:
return None
def start_udd_connection(): def start_udd_connection():
username = "public-udd-mirror" username = "public-udd-mirror"
...@@ -797,8 +831,6 @@ class Package: ...@@ -797,8 +831,6 @@ class Package:
return False return False
# init the databases connections
conn_db = start_db_connection() # the local sqlite3 reproducible db
# get_bugs() is the only user of this, let it initialize the connection itself, # get_bugs() is the only user of this, let it initialize the connection itself,
# during it's first call to speed up things when unneeded # during it's first call to speed up things when unneeded
# also "share" the bugs, to avoid collecting them multiple times per run # also "share" the bugs, to avoid collecting them multiple times per run
......
...@@ -8,12 +8,14 @@ ...@@ -8,12 +8,14 @@
# #
# Import the content of the notes.git repository into the reproducible database # Import the content of the notes.git repository into the reproducible database
from reproducible_common import *
import os
import apt import apt
import json
import yaml import yaml
import json
from sqlalchemy import sql
from apt_pkg import version_compare from apt_pkg import version_compare
from reproducible_common import *
import os
NOTES = 'packages.yml' NOTES = 'packages.yml'
ISSUES = 'issues.yml' ISSUES = 'issues.yml'
...@@ -100,53 +102,74 @@ def load_issues(): ...@@ -100,53 +102,74 @@ def load_issues():
def store_issues(): def store_issues():
query = 'REPLACE INTO issues (name, url, description) ' + \ issues_table = db_table('issues')
'VALUES (?, ?, ?)' # Get existing issues
cursor = conn_db.cursor() results = conn_db.execute(sql.select([issues_table.c.name]))
to_add = [] existing_issues = set([row[0] for row in results])
for issue in sorted(issues): to_insert = []
name = issue to_update = []
for name in issues:
url = issues[name]['url'] if 'url' in issues[name] else '' url = issues[name]['url'] if 'url' in issues[name] else ''
desc = issues[name]['description'] desc = issues[name]['description']
to_add.append((name, url, desc)) if name in existing_issues:
cursor.executemany(query, to_add) to_update.append({
conn_db.commit() 'issuename': name,
log.debug('Issues saved in the database') 'url': url,
'description': desc
})
# remove this package from the set, to know who to delete later
existing_issues.remove(name)
else:
to_insert.append({
'name': name,
'url': url,
'description': desc
})
if to_update:
update_query = issues_table.update().\
where(issues_table.c.name == sql.bindparam('issuename'))
conn_db.execute(update_query, to_update)
log.debug('Issues updated in the database')
if to_insert:
conn_db.execute(issues_table.insert(), to_insert)
log.debug('Issues added to the database')
def drop_old_issues(): # if there are any existing issues left, delete them.
old = [x[0] for x in query_db('SELECT name FROM issues')] if existing_issues:
to_drop = [x for x in old if x not in issues] to_delete = [{'issuename': name} for name in existing_issues]
if to_drop: delete_query = issues_table.delete().\
log.info("I'm about to remove the following issues: " + str(to_drop)) where(issues_table.c.name == sql.bindparam('issuename'))
for issue in to_drop: conn_db.execute(delete_query, to_delete)
query_db('DELETE FROM issues WHERE name="{}"'.format(issue)) log.info("Removed the following issues: " + str(existing_issues))
def store_notes(): def store_notes():
log.debug('Removing all notes') log.debug('Removing all notes')
query_db('DELETE FROM notes') notes_table = db_table('notes')
query = 'REPLACE INTO notes ' + \ conn_db.execute(notes_table.delete())
'(package_id, version, issues, bugs, comments) ' + \ to_insert = []
'VALUES (?, ?, ?, ?, ?)'
to_add = []
for entry in [x for y in sorted(notes) for x in notes[y]]: for entry in [x for y in sorted(notes) for x in notes[y]]:
pkg_id = entry['id'] pkg_id = entry['id']
pkg_version = entry['version'] pkg_version = entry['version']
pkg_issues = json.dumps(entry['issues']) pkg_issues = json.dumps(entry['issues'])
pkg_bugs = json.dumps(entry['bugs']) pkg_bugs = json.dumps(entry['bugs'])
pkg_comments = entry['comments'] pkg_comments = entry['comments']
pkg = (pkg_id, pkg_version, pkg_issues, pkg_bugs, pkg_comments) to_insert.append({
to_add.append(pkg) 'id': pkg_id,
cursor = conn_db.cursor() 'version': pkg_version,
cursor.executemany(query, to_add) 'issues': pkg_issues,
conn_db.commit() 'bugs': pkg_bugs,
log.info('Saved ' + str(len(to_add)) + ' notes in the database') 'comments': pkg_comments
})
if (len(to_insert)):
conn_db.execute(notes_table.insert(), to_insert)
log.info('Saved ' + str(len(to_insert)) + ' notes in the database')
if __name__ == '__main__': if __name__ == '__main__':
notes = load_notes() notes = load_notes()
issues = load_issues() issues = load_issues()
store_issues() store_issues()
drop_old_issues()
store_notes() store_notes()
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
import sys import sys
import time import time
import argparse import argparse
from sqlalchemy import sql
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description='Reschedule packages to re-test their reproducibility', description='Reschedule packages to re-test their reproducibility',
...@@ -229,7 +229,8 @@ if amount + len(ids) > 200 and not local: ...@@ -229,7 +229,8 @@ if amount + len(ids) > 200 and not local:
# do the actual scheduling # do the actual scheduling
to_schedule = [] add_to_schedule = []
update_schedule = []
save_schedule = [] save_schedule = []
artifacts_value = 1 if artifacts else 0 artifacts_value = 1 if artifacts else 0
if notify_on_start: if notify_on_start:
...@@ -238,23 +239,54 @@ elif notify or artifacts: ...@@ -238,23 +239,54 @@ elif notify or artifacts:
do_notify = 1 do_notify = 1
else: else:
do_notify = 0 do_notify = 0
schedule_table = db_table('schedule')
existing_pkg_ids = dict(query_db(sql.select([
schedule_table.c.package_id,
schedule_table.c.id,
]).where(schedule_table.c.package_id.in_(ids))))
for id in ids: for id in ids:
to_schedule.append((id, date, artifacts_value, str(do_notify), requester, if id in existing_pkg_ids:
None)) update_schedule.append({
save_schedule.append((id, requester, epoch)) 'update_id': existing_pkg_ids[id],
log.debug('Packages about to be scheduled: ' + str(to_schedule)) 'package_id': id,
'date_scheduled': date,
'save_artifacts': artifacts_value,
'notify': str(do_notify),
'scheduler': requester,
})
else:
add_to_schedule.append({
'package_id': id,
'date_scheduled': date,
'save_artifacts': artifacts_value,
'notify': str(do_notify),
'scheduler': requester,
})
save_schedule.append({
'package_id': id,
'requester': requester,
'date_request': epoch,
})
log.debug('Packages about to be scheduled: ' + str(add_to_schedule)
+ str(update_schedule))
query1 = '''REPLACE INTO schedule update_schedule_query = schedule_table.update().\
(package_id, date_scheduled, save_artifacts, notify, scheduler, message) where(schedule_table.c.id == sql.bindparam('update_id'))
VALUES (?, ?, ?, ?, ?, ?)''' insert_schedule_query = schedule_table.insert()
query2 = '''INSERT INTO manual_scheduler insert_manual_query = db_table('manual_scheduler').insert()
(package_id, requester, date_request) VALUES (?, ?, ?)'''
if not dry_run: if not dry_run:
cursor = conn_db.cursor() transaction = conn_db.begin()
cursor.executemany(query1, to_schedule) if len(add_to_schedule):
cursor.executemany(query2, save_schedule) conn_db.execute(insert_schedule_query, add_to_schedule)
conn_db.commit() if len(update_schedule):
conn_db.execute(update_schedule_query, update_schedule)
conn_db.execute(insert_manual_query, save_schedule)
transaction.commit()
else: else:
log.info('Ran with --dry-run, scheduled nothing') log.info('Ran with --dry-run, scheduled nothing')
......
...@@ -18,6 +18,7 @@ import random ...@@ -18,6 +18,7 @@ import random
from subprocess import call from subprocess import call
from apt_pkg import version_compare from apt_pkg import version_compare
from urllib.request import urlopen from urllib.request import urlopen
from sqlalchemy import sql
from reproducible_common import * from reproducible_common import *
from reproducible_html_live_status import generate_schedule from reproducible_html_live_status import generate_schedule
...@@ -228,8 +229,8 @@ def update_sources_db(suite, arch, sources): ...@@ -228,8 +229,8 @@ def update_sources_db(suite, arch, sources):
pkgs_to_add = [] pkgs_to_add = []
updated_pkgs = [] updated_pkgs = []
different_pkgs = [x for x in new_pkgs if x not in cur_pkgs] different_pkgs = [x for x in new_pkgs if x not in cur_pkgs]
log.debug('Packages different in the archive and in the db: ' + log.debug('Packages different in the archive and in the db: %s',
str(different_pkgs)) different_pkgs)
for pkg in different_pkgs: for pkg in different_pkgs:
# pkg: (name, version, suite, arch) # pkg: (name, version, suite, arch)
query = 'SELECT id, version, notify_maintainer FROM sources ' + \ query = 'SELECT id, version, notify_maintainer FROM sources ' + \
...@@ -238,7 +239,12 @@ def update_sources_db(suite, arch, sources): ...@@ -238,7 +239,12 @@ def update_sources_db(suite, arch, sources):
try: try:
result = query_db(query)[0] result = query_db(query)[0]
except IndexError: # new package except IndexError: # new package
pkgs_to_add.append(pkg) pkgs_to_add.append({
'name': pkg[0],
'version': pkg[1],
'suite': pkg[2],
'architecture': pkg[3],
})
continue continue
pkg_id = result[0] pkg_id = result[0]
old_version = result[1] old_version = result[1]
...@@ -246,53 +252,71 @@ def update_sources_db(suite, arch, sources): ...@@ -246,53 +252,71 @@ def update_sources_db(suite, arch, sources):
if version_compare(pkg[1], old_version) > 0: if version_compare(pkg[1], old_version) > 0:
log.debug('New version: ' + str(pkg) + ' (we had ' + log.debug('New version: ' + str(pkg) + ' (we had ' +
old_version + ')') old_version + ')')
updated_pkgs.append( updated_pkgs.append({
(pkg_id, pkg[0], pkg[1], pkg[2], pkg[3], notify_maint)) 'update_id': pkg_id,
'name': pkg[0],
'version': pkg[1],
'suite': pkg[2],
'architecture': pkg[3],
'notify_maintainer': notify_maint,
})
# Now actually update the database: # Now actually update the database:
cursor = conn_db.cursor() sources_table = db_table('sources')
# updated packages # updated packages
log.info('Pushing ' + str(len(updated_pkgs)) + log.info('Pushing ' + str(len(updated_pkgs)) +
' updated packages to the database...') ' updated packages to the database...')
cursor.executemany( if updated_pkgs:
'REPLACE INTO sources ' + transaction = conn_db.begin()
'(id, name, version, suite, architecture, notify_maintainer) ' + update_query = sources_table.update().\
'VALUES (?, ?, ?, ?, ?, ?)', where(sources_table.c.id == sql.bindparam('update_id'))
updated_pkgs) conn_db.execute(update_query, updated_pkgs)
conn_db.commit() transaction.commit()
# new packages # new packages
log.info('Now inserting ' + str(len(pkgs_to_add)) + if pkgs_to_add:
' new sources in the database: ' + log.info('Now inserting %i new sources in the database: %s',
str(pkgs_to_add)) len(pkgs_to_add), pkgs_to_add)
cursor.executemany('INSERT INTO sources ' + transaction = conn_db.begin()
'(name, version, suite, architecture) ' + conn_db.execute(sources_table.insert(), pkgs_to_add)
'VALUES (?, ?, ?, ?)', pkgs_to_add) transaction.commit()
conn_db.commit()
# RM'ed packages # RM'ed packages
cur_pkgs_name = [x[0] for x in cur_pkgs] cur_pkgs_name = [x[0] for x in cur_pkgs]
new_pkgs_name = [x[0] for x in new_pkgs] new_pkgs_name = [x[0] for x in new_pkgs]
rmed_pkgs = [x for x in cur_pkgs_name if x not in new_pkgs_name] rmed_pkgs = [x for x in cur_pkgs_name if x not in new_pkgs_name]
log.info('Now deleting ' + str(len(rmed_pkgs)) + log.info('Now deleting %i removed packages: %s', len(rmed_pkgs),
' removed packages: ' + str(rmed_pkgs)) rmed_pkgs)
rmed_pkgs_id = [] rmed_pkgs_id = []
pkgs_to_rm = [] pkgs_to_rm = []
query = 'SELECT id FROM sources WHERE name="{}" AND suite="{}" ' + \ query = 'SELECT id FROM sources WHERE name="{}" AND suite="{}" ' + \
'AND architecture="{}"' 'AND architecture="{}"'
for pkg in rmed_pkgs: for pkg in rmed_pkgs:
result = query_db(query.format(pkg, suite, arch)) result = query_db(query.format(pkg, suite, arch))
rmed_pkgs_id.extend(result) rmed_pkgs_id.append({'deleteid': result[0][0]})
pkgs_to_rm.append((pkg, suite, arch)) pkgs_to_rm.append({'name': pkg, 'suite': suite, 'architecture': arch})
log.debug('removed packages ID: ' + str([str(x[0]) for x in rmed_pkgs_id])) log.debug('removed packages ID: %s',
log.debug('removed packages: ' + str(pkgs_to_rm)) [str(x['deleteid']) for x in rmed_pkgs_id])
cursor.executemany('DELETE FROM sources ' log.debug('removed packages: %s', pkgs_to_rm)
'WHERE id=?', rmed_pkgs_id)
cursor.executemany('DELETE FROM results ' if rmed_pkgs_id:
'WHERE package_id=?', rmed_pkgs_id) transaction = conn_db.begin()
cursor.executemany('DELETE FROM schedule ' results_table = db_table('results')
'WHERE package_id=?', rmed_pkgs_id) schedule_table = db_table('schedule')
cursor.executemany('INSERT INTO removed_packages ' removed_packages_table = db_table('removed_packages')
'(name, suite, architecture) '
'VALUES (?, ?, ?)', pkgs_to_rm) delete_sources_query = sources_table.delete().\
conn_db.commit() where(sources_table.c.id == sql.bindparam('deleteid'))
delete_results_query = results_table.delete().\
where(results_table.c.package_id == sql.bindparam('deleteid'))
delete_schedule_query = schedule_table.delete().\
where(schedule_table.c.package_id == sql.bindparam('deleteid'))
conn_db.execute(delete_sources_query, rmed_pkgs_id)
conn_db.execute(delete_results_query, rmed_pkgs_id)
conn_db.execute(delete_schedule_query, rmed_pkgs_id)
conn_db.execute(removed_packages_table.insert(), pkgs_to_rm)
transaction.commit()
# finally check whether the db has the correct number of packages # finally check whether the db has the correct number of packages
query = 'SELECT count(*) FROM sources WHERE suite="{}" ' + \ query = 'SELECT count(*) FROM sources WHERE suite="{}" ' + \
'AND architecture="{}"' 'AND architecture="{}"'
...@@ -301,14 +325,14 @@ def update_sources_db(suite, arch, sources): ...@@ -301,14 +325,14 @@ def update_sources_db(suite, arch, sources):
if int(pkgs_end[0][0]) != count_new_pkgs: if int(pkgs_end[0][0]) != count_new_pkgs:
print_critical_message('AH! The number of source in the Sources file' + print_critical_message('AH! The number of source in the Sources file' +
' is different than the one in the DB!') ' is different than the one in the DB!')
log.critical('source in the debian archive for the ' + suite + log.critical('source in the debian archive for the %s suite: %s',
' suite:' + str(count_new_pkgs)) suite, str(count_new_pkgs))
log.critical('source in the reproducible db for the ' + suite + log.critical('source in the reproducible db for the %s suite: %s',
' suite:' + str(pkgs_end[0][0])) suite, str(pkgs_end[0][0]))
sys.exit(1) sys.exit(1)
if pkgs_to_add: if pkgs_to_add:
log.info('Building pages for the new packages') log.info('Building pages for the new packages')
gen_packages_html([Package(x[0]) for x in pkgs_to_add], no_clean=True) gen_packages_html([Package(x['name']) for x in pkgs_to_add], no_clean=True)
def print_schedule_result(suite, arch, criteria, packages): def print_schedule_result(suite, arch, criteria, packages):
...@@ -334,14 +358,10 @@ def queue_packages(all_pkgs, packages, date): ...@@ -334,14 +358,10 @@ def queue_packages(all_pkgs, packages, date):
def schedule_packages(packages): def schedule_packages(packages):
pkgs = ((x, packages[x]) for x in packages) pkgs = [{'package_id': x, 'date_scheduled': packages[x]} for x in packages.keys()]
log.debug('IDs about to be scheduled: ' + str(packages.keys())) log.debug('IDs about to be scheduled: %s', packages.keys())
query = 'INSERT INTO schedule ' + \ if pkgs:
'(package_id, date_scheduled) ' + \ conn_db.execute(db_table('schedule').insert(), pkgs)
'VALUES (?, ?)'
cursor = conn_db.cursor()
cursor.executemany(query, pkgs)
conn_db.commit()
def add_up_numbers(packages, arch): def add_up_numbers(packages, arch):
......
...@@ -29,7 +29,6 @@ from reproducible_common import * ...@@ -29,7 +29,6 @@ from reproducible_common import *
from reproducible_html_packages import gen_packages_html from reproducible_html_packages import gen_packages_html
from reproducible_html_indexes import build_page from reproducible_html_indexes import build_page
class bcolors: class bcolors:
BOLD = '\033[1m' BOLD = '\033[1m'
UNDERLINE = '\033[4m' UNDERLINE = '\033[4m'
...@@ -51,14 +50,18 @@ def _good(text): ...@@ -51,14 +50,18 @@ def _good(text):
def process_pkg(package, deactivate): def process_pkg(package, deactivate):
if deactivate: if deactivate:
_good('Deactovating notification for package ' + str(package)) _good('Deactivating notification for package ' + str(package))
flag = 0 flag = 0
else: else:
_good('Activating notification for package ' + str(package)) _good('Activating notification for package ' + str(package))
flag = 1 flag = 1
rows = c.execute(('UPDATE OR FAIL sources SET notify_maintainer="{}" ' +
'WHERE name="{}"').format(flag, package)).rowcount sources_table = db_table('sources')
conn_db.commit() update_query = sources_table.update().\
where(sources_table.c.name == package).\
values(notify_maintainer=flag)
rows = conn_db.execute(update_query).rowcount
if rows == 0: if rows == 0:
log.error(bcolors.FAIL + str(package) + ' does not exists') log.error(bcolors.FAIL + str(package) + ' does not exists')
sys.exit(1) sys.exit(1)
...@@ -86,8 +89,6 @@ if maintainer: ...@@ -86,8 +89,6 @@ if maintainer:
log.info('\t' + ', '.join(pkgs)) log.info('\t' + ', '.join(pkgs))
packages.extend(pkgs) packages.extend(pkgs)
c = conn_db.cursor()
for package in packages: for package in packages:
process_pkg(package, local_args.deactivate) process_pkg(package, local_args.deactivate)
......
...@@ -388,9 +388,10 @@ if [ -f /etc/debian_version ] ; then ...@@ -388,9 +388,10 @@ if [ -f /etc/debian_version ] ; then
poxml poxml
procmail procmail
python3-debian python3-debian
python3-xdg
python3-yaml
python3-pystache python3-pystache
python3-sqlalchemy
python3-xdg
python3-yaml
python-arpy python-arpy
python-hachoir-metadata python-hachoir-metadata
python-imaging python-imaging
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment