reproducible_compare_Debian_sha1sums.sh 9.46 KB
Newer Older
1
#!/bin/bash
2

3 4
# Copyright 2019 Holger Levsen <holger@layer-acht.org>
# released under the GPLv=2+
5 6 7
#
# based on an idea by Vagrant Cascadian <vagrant@debian.org>
# see https://lists.reproducible-builds.org/pipermail/rb-general/2018-October/001239.html
8 9 10 11 12 13 14 15 16 17

DEBUG=false
. /srv/jenkins/bin/common-functions.sh
common_init "$@"

# common code defining db access
. /srv/jenkins/bin/reproducible_common.sh

set -e

18
# TODOs:
19
# - ${package_file}.sha1output includes ${package_file} in the file name and contents
20
# - GRAPH
21
# - save results in db
22
# - loop through all packages known in db
23
# - show results in 'normal pages' 
24
# - store date when a package was last reproduced... (and constantly do that...)
25
# - throw away results (if none has been|which have not) signed with a tests.r-b.o key
26
# - rebuilder:
27
#   - run on osuosl173
28 29 30 31 32
#   - loop randomly through unreproducible packages first, then reproducible ones. do one attempt only.
#   - one job run tests one package.
#   - use same debuild options possible? or try all sane options?
#   - submit .buildinfo file to b.d.n and then fetch the json again.
#   - debootstrap stretch and upgrade from there?
33
# - this is all amd64 only for a start
34

35 36 37
RELEASE=buster
MODE="$1"

38 39
echo
echo
40 41 42 43 44 45
echo -n 'this is an early prototype...'
if [ "$MODE" = "results" ] ; then
	echo 'this job will show results based on data gathered in other jobs.'
else
	echo 'this job gathers data but does not show results.'
fi
46 47
echo
echo
48

49
bdn_url="https://buildinfo.debian.net/api/v1/buildinfos/checksums/sha1"
50
log=$(mktemp --tmpdir=$TMPDIR sha1-log-XXXXXXX)
51
echo "$(date -u) - logfile used is $log"
52

53 54 55 56
FORCE_DATE=$(date -u -d "14 days ago" '+%Y-%m-%d')
DUMMY_FILE=$(mktemp --tmpdir=$TMPDIR sha1-date-XXXXXXX)
touch -d "$(date '+%Y-%m-%d') 00:00 UTC" $DUMMY_FILE

57 58 59
SHA1DIR=/srv/reproducible-results/debian-sha1
mkdir -p $SHA1DIR

60
PACKAGES=$(mktemp --tmpdir=$TMPDIR sha1-pkgs-XXXXXXX)
61
schroot --directory  $SHA1DIR -c chroot:jenkins-reproducible-${RELEASE}-diffoscope cat /var/lib/apt/lists/cdn-fastly.deb.debian.org_debian_dists_${RELEASE}_main_binary-amd64_Packages > $PACKAGES
62
case "$MODE" in
63 64 65 66
	random)		SORT="sort -u -R";;
	reverse)	SORT="sort -u -r" ;;
	forward)	SORT="sort -u" ;;
	*)		SORT="sort -u" ; MODE="results" ; RESULTS=$(mktemp --tmpdir=$TMPDIR sha1-results-XXXXXXX) ; find $SHA1DIR -name "*REPRODUCIBLE.buster" > $RESULTS ;;
67 68
esac
packages="$(grep ^Package: $PACKAGES| awk '{print $2}' | $SORT | xargs echo)"
69

70 71
reproducible_packages=
unreproducible_packages=
72

73
cleanup_all() {
74
	if [ "$MODE" = "results" ]; then
75 76
		unknown_packages=$(awk '/ UNKNOWN: /{print $9}' $log)
		unknown_count=$(echo $unknown_packages | wc -w)
77 78 79 80
		reproducible_packages=$(awk '/ REPRODUCIBLE: /{print $9}' $log)
		reproducible_count=$(echo $reproducible_packages | wc -w)
		unreproducible_packages=$(awk '/ UNREPRODUCIBLE: /{print $9}' $log)
		unreproducible_count=$(echo $unreproducible_packages | wc -w)
81 82 83 84 85 86
		reproducible_binnmu=$((find $SHA1DIR -type f || echo ) | egrep -c '+b._(all|amd64).deb.REPRODUCIBLE.buster' || true)
		unreproducible_binnmu=$((find $SHA1DIR -type f || echo ) | egrep -c '+b._(all|amd64).deb.UNREPRODUCIBLE.buster' || true)
		reproducible_arch_all=$((find $SHA1DIR -type f || echo ) | egrep -c '_all.deb.REPRODUCIBLE.buster' || true)
		unreproducible_arch_all=$((find $SHA1DIR -type f || echo ) | egrep -c '_all.deb.UNREPRODUCIBLE.buster' || true)
		reproducible_arch_amd64=$((find $SHA1DIR -type f || echo ) | egrep -c '_amd64.deb.REPRODUCIBLE.buster' || true)
		unreproducible_arch_amd64=$((find $SHA1DIR -type f || echo ) | egrep -c '_amd64.deb.UNREPRODUCIBLE.buster' || true)
87 88 89
		percent_unknown=$(echo "scale=4 ; $unknown_count / ($reproducible_count+$unreproducible_count+$unknown_count) * 100" | bc)
		percent_repro=$(echo "scale=4 ; $reproducible_count / ($reproducible_count+$unreproducible_count+$unknown_count) * 100" | bc)
		percent_unrepro=$(echo "scale=4 ; $unreproducible_count / ($reproducible_count+$unreproducible_count+$unknown_count) * 100" | bc)
90 91 92 93 94 95
		percent_binnmu_repro=$(echo "scale=4 ; $reproducible_binnmu / ($reproducible_count+$unreproducible_count+$unknown_count) * 100" | bc)
		percent_binnmu_unrepro=$(echo "scale=4 ; $unreproducible_binnmu / ($reproducible_count+$unreproducible_count+$unknown_count) * 100" | bc)
		percent_arch_all_repro=$(echo "scale=4 ; $reproducible_arch_all / ($reproducible_count+$unreproducible_count+$unknown_count) * 100" | bc)
		percent_arch_all_unrepro=$(echo "scale=4 ; $unreproducible_arch_all / ($reproducible_count+$unreproducible_count+$unknown_count) * 100" | bc)
		percent_arch_amd64_repro=$(echo "scale=4 ; $reproducible_arch_amd64 / ($reproducible_count+$unreproducible_count+$unknown_count) * 100" | bc)
		percent_arch_amd64_unrepro=$(echo "scale=4 ; $unreproducible_arch_amd64 / ($reproducible_count+$unreproducible_count+$unknown_count) * 100" | bc)
96
		echo "-------------------------------------------------------------"
97
		echo
98
		echo "ftp.debian.org package reproducibility statistics"
99
		echo "-------------------------------------------------------------"
100
		echo "packages in unknown reproducibility state in $RELEASE/amd64: $unknown_count: ($percent_unknown%)"
101 102
		echo "reproducible packages in $RELEASE/amd64: $reproducible_count: ($percent_repro%)"
		echo "unreproducible packages in $RELEASE/amd64: $unreproducible_count: ($percent_unrepro%)"
103
		echo "total number of packages in $RELEASE/amd64: $(echo $reproducible_count+$unreproducible_count+$unknown_count | bc)"
104
		echo
105
		echo
106 107 108 109
		echo "reproducible binNMUs in $RELEASE/amd64: $reproducible_binnmu: ($percent_binnmu_repro%)"
		echo "unreproducible binNMU in $RELEASE/amd64: $unreproducible_binnmu: ($percent_binnmu_unrepro%)"
		echo
		echo "reproducible arch:all packages in $RELEASE/amd64: $reproducible_arch_all: ($percent_arch_all_repro%)"
110
		echo "unreproducible arch:all packages in $RELEASE/amd64: $unreproducible_arch_all: ($percent_arch_all_unrepro%)"
111
		echo "reproducible arch:amd64 packages in $RELEASE/amd64: $reproducible_arch_amd64: ($percent_arch_amd64_repro%)"
112
		echo "unreproducible arch:amd64 packages in $RELEASE/amd64: $unreproducible_arch_amd64: ($percent_arch_amd64_unrepro%)"
113
		echo
114
		echo
115
		echo "$(du -sch $SHA1DIR 2>/dev/null)"
116
		echo
117
		rm $RESULTS
118
	fi
119
	rm $log $PACKAGES
120 121 122 123
}

trap cleanup_all INT TERM EXIT

124 125
rm -f $SHA1DIR/*.lock	# this is a tiny bit hackish, but also an elegant way to get rid of old locks...
			# (locks are held for 30s only anyway and there is an 3/60000th chance of a race condition only anyway)
126

127 128
if [ "$MODE" = "results" ] ; then
	for package in $packages ; do
129 130 131
		result=$(grep "/${package}_" $RESULTS || true)
		if [ -n "$result" ] ; then
			if $(echo $result | grep -q UNREPRODUCIBLE) ; then
132 133 134 135 136 137
				#package_file=$(echo $result | sed 's#\.deb\.UNREPRODUCIBLE\.buster$#.deb#' )
				#count=1
				#SHA1SUM_PKG="$(cat ${package_file}.sha1output | awk '{print $1}' )"
				#package_file=$(basename $package_file)
				#echo "$(date -u) - UNREPRODUCIBLE: $package_file ($SHA1SUM_PKG) only on ftp.debian.org."
				echo "$(date -u) - UNREPRODUCIBLE: $package"
138
			else
139 140 141 142 143 144 145 146 147
				#package_file=$(echo $result | sed 's#\.deb\.REPRODUCIBLE\.buster$#.deb#' )
				#count=$(cat ${package_file}.REPRODUCIBLE.$RELEASE)
				#SHA1SUM_PKG="$(cat ${package_file}.sha1output | awk '{print $1}' )"
				#package_file=$(basename $package_file)
				#echo "$(date -u) - REPRODUCIBLE: $package_file ($SHA1SUM_PKG) - reproduced $count times."
				echo "$(date -u) - REPRODUCIBLE: $package"
			fi
			continue
		fi
148
		echo "$(date -u) - UNKNOWN: $package"
149 150 151 152 153 154
	done | tee $log
	exit
fi

# only used by the runners
for package in $packages ; do
155 156
	LOCK="$SHA1DIR/${package}.lock"
	if [ -e $LOCK ] ; then
157
		echo "$(date -u) - skipping locked package $package"
158
		continue
159
	else
160 161
		touch $LOCK
	fi
162 163
	version=$(grep-dctrl -X -P ${package} -s Version -n $PACKAGES | head -1)
	arch=$(grep-dctrl -X -P ${package} -s Architecture -n $PACKAGES | head -1)
164
	package_file="${package}_$(echo $version | sed 's#:#%3a#')_${arch}.deb"
165
	pool_dir="$SHA1DIR/$(dirname $(grep-dctrl -X -P ${package} -s Filename -n $PACKAGES | head -1))"
166
	mkdir -p $pool_dir
167
	cd $pool_dir
168
	if [ ! -e ${package_file}.sha1output ] ; then
169
		echo -n "$(date -u) - downloading... "
170
		( schroot --directory $pool_dir -c chroot:jenkins-reproducible-${RELEASE}-diffoscope apt-get download ${package}/${RELEASE} 2>&1 |xargs echo ) || continue
171
		echo "$(date -u) - calculating sha1sum for ${package_file}"
172
		SHA1SUM_PKG="$(sha1sum ${package_file} | tee ${package_file}.sha1output | awk '{print $1}' )"
173
		rm ${package_file}
174
		if [ -n "$(ls ${package}_*REPRODUCIBLE.$RELEASE 2>/dev/null)" ] ; then
175
			echo "$(date -u) - $package was updated, deleting results for old version."
176
			rm ${package}_*REPRODUCIBLE.$RELEASE
177
		fi
178
	fi
179 180
	# download .json if non existing or older than $DUMMY_FILE
	if [ ! -e ${package_file}.json ] || [ ${package_file}.json -ot $DUMMY_FILE ]; then
181
		echo "$(date -u) - downloading .json for ${package_file} (${SHA1SUM_PKG}) from buildinfo.debian.net"
182
		wget --quiet -O ${package_file}.json ${bdn_url}/${SHA1SUM_PKG} || echo "WARNING: failed to download ${bdn_url}/${SHA1SUM_PKG}"
183 184 185 186 187 188 189 190 191
		count=$(fmt ${package_file}.json | grep -c '\.buildinfo' || true)
		SHA1SUM_PKG="$(cat ${package_file}.sha1output | awk '{print $1}' )"
		if [ "${count}" -ge 2 ]; then
			echo $count > ${package_file}.REPRODUCIBLE.$RELEASE
			echo "$(date -u) - REPRODUCIBLE: $package_file ($SHA1SUM_PKG) - reproduced $count times."
		else
			echo 1 > ${package_file}.UNREPRODUCIBLE.$RELEASE
			echo "$(date -u) - UNREPRODUCIBLE: $package_file ($SHA1SUM_PKG) only on ftp.debian.org."
		fi
192
	else
193
		echo "$(date -u) - not updating data about ${package_file}"
194
	fi
195
	rm -f $LOCK
196
done | tee -a $log
197

198 199
cleanup_all
trap - INT TERM EXIT