reproducible_compare_Debian_sha1sums.sh 5.04 KB
Newer Older
1
#!/bin/bash
2

3 4
# Copyright 2019 Holger Levsen <holger@layer-acht.org>
# released under the GPLv=2+
5 6 7
#
# based on an idea by Vagrant Cascadian <vagrant@debian.org>
# see https://lists.reproducible-builds.org/pipermail/rb-general/2018-October/001239.html
8 9 10 11 12 13 14 15 16 17

DEBUG=false
. /srv/jenkins/bin/common-functions.sh
common_init "$@"

# common code defining db access
. /srv/jenkins/bin/reproducible_common.sh

set -e

18
# TODOs:
19
# - ${package_file}.sha1output includes ${package_file} in the file name and contents
20
# - run on osuoslXXX ? harder with using db..
21
# - GRAPH
22
# - save results in db
23
# - loop through all packages known in db
24
# - show results in 'normal pages' 
25
# - store date when a package was last reproduced... (and constantly do that...)
26
# - throw away results (if none has been|which have not) signed with a tests.r-b.o key
27
# - json files from buildinfo.d.n are never re-downloaded
28

29 30 31
RELEASE=buster
MODE="$1"

32 33
echo
echo
34 35 36 37 38 39
echo -n 'this is an early prototype...'
if [ "$MODE" = "results" ] ; then
	echo 'this job will show results based on data gathered in other jobs.'
else
	echo 'this job gathers data but does not show results.'
fi
40 41
echo
echo
42

43
bdn_url="https://buildinfo.debian.net/api/v1/buildinfos/checksums/sha1"
44
log=$(mktemp --tmpdir=$TMPDIR sha1-log-XXXXXXX)
45

46 47 48
SHA1DIR=/srv/reproducible-results/debian-sha1
mkdir -p $SHA1DIR

49
PACKAGES=$(mktemp --tmpdir=$TMPDIR sha1-pkgs-XXXXXXX)
50
schroot --directory  $SHA1DIR -c chroot:jenkins-reproducible-${RELEASE}-diffoscope cat /var/lib/apt/lists/cdn-fastly.deb.debian.org_debian_dists_${RELEASE}_main_binary-amd64_Packages > $PACKAGES
51
case "$MODE" in
52 53
	random)		SORT="sort -R";;
	reverse)	SORT="sort -r" ;;
54 55
	forward)	SORT="sort" ;;
	*)		SORT="sort" ; MODE="results" ;;
56 57
esac
packages="$(grep ^Package: $PACKAGES| awk '{print $2}' | $SORT | xargs echo)"
58

59 60
reproducible_packages=
unreproducible_packages=
61

62
cleanup_all() {
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
	if [ "$MODE" = "results" ]; then
		reproducible_packages=$(awk '/ REPRODUCIBLE: /{print $9}' $log)
		reproducible_count=$(echo $reproducible_packages | wc -w)
		unreproducible_packages=$(awk '/ UNREPRODUCIBLE: /{print $9}' $log)
		unreproducible_count=$(echo $unreproducible_packages | wc -w)
		percent_repro=$(echo "scale=4 ; $reproducible_count / ($reproducible_count+$unreproducible_count) * 100" | bc)
		percent_unrepro=$(echo "scale=4 ; $unreproducible_count / ($reproducible_count+$unreproducible_count) * 100" | bc)
		echo "-------------------------------------------------------------"
		echo "reproducible packages: $reproducible_count: $reproducible_packages"
		echo
		echo "unreproducible packages: $unreproducible_count: $unreproducible_packages"
		echo
		echo "reproducible packages: $reproducible_count: ($percent_repro%)"
		echo "unreproducible packages: $unreproducible_count: ($percent_unrepro%)"
		echo
		echo
		echo "$(du -sch $SHA1DIR)"
		echo
	fi
82
	rm $log $PACKAGES
83 84 85 86
}

trap cleanup_all INT TERM EXIT

87 88
rm -f $SHA1DIR/*.lock	# this is a tiny bit hackish, but also an elegant way to get rid of old locks...
			# (locks are held for 30s only anyway and there is an 3/60000th chance of a race condition only anyway)
89

90
for package in $packages ; do
91 92
	LOCK="$SHA1DIR/${package}.lock"
	if [ -e $LOCK ] ; then
93 94
		echo "$(date -u) - skipping locked package $package, sleeping a minute to deescalate."
		sleep 60
95 96 97 98
		continue
	else
		touch $LOCK
	fi
99 100 101
	version=$(grep-dctrl -X -P ${package} -s version -n $PACKAGES)
	arch=$(grep-dctrl -X -P ${package} -s Architecture -n $PACKAGES)
	package_file="${package}_$(echo $version | sed 's#:#%3a#')_${arch}.deb"
102
	pool_dir="$SHA1DIR/$(dirname $(grep-dctrl -X -P ${package} -s Filename -n $PACKAGES))"
103
	mkdir -p $pool_dir
104
	cd $pool_dir
105 106 107
	if [ "$MODE" = "results" ] ; then
	        if  [ -e ${package_file}.json ] ; then
			count=$(fmt ${package_file}.json | grep -c '\.buildinfo' || true)
108
			SHA1SUM_PKG="$(cat ${package_file}.sha1output | awk '{print $1}' )"
109
			if [ "${count}" -ge 2 ]; then
110
				echo "$(date -u) - REPRODUCIBLE: $package_file ($SHA1SUM_PKG) - reproduced $count times."
111
			else
112
				echo "$(date -u) - UNREPRODUCIBLE: $package_file ($SHA1SUM_PKG) on ftp.debian.org, but nowhere else."
113 114 115 116
			fi
		fi
		continue
	fi
117
	if [ ! -e ${package_file}.sha1output ] ; then
118
		echo -n "$(date -u) - downloading... "
119
		( schroot --directory $pool_dir -c chroot:jenkins-reproducible-${RELEASE}-diffoscope apt-get download ${package}/${RELEASE} 2>&1 |xargs echo ) || continue
120
		echo "$(date -u) - calculating sha1sum for ${package_file}"
121
		SHA1SUM_PKG="$(sha1sum ${package_file} | tee ${package_file}.sha1output | awk '{print $1}' )"
122
		rm ${package_file}
123
	else
124
		echo "$(date -u) - ${package_file} is known, gathering sha1sum"
125
		SHA1SUM_PKG="$(cat ${package_file}.sha1output | awk '{print $1}' )"
126
	fi
127
	if [ ! -e ${package_file}.json ]; then
128
		echo "$(date -u) - downloading .json for ${package_file} (${SHA1SUM_PKG}) from buildinfo.debian.net"
129
		wget --quiet -O ${package_file}.json ${bdn_url}/${SHA1SUM_PKG} || echo "WARNING: failed to download ${bdn_url}/${SHA1SUM_PKG}"
130
	else
131
		echo "$(date -u) - reusing local copy of .json for ${package_file} (${SHA1SUM_PKG}) from buildinfo.debian.net"
132
	fi
133
	rm -f $LOCK
134
done | tee $log
135

136 137
cleanup_all
trap - INT TERM EXIT