reproducible_archlinux_scheduler.sh 10.8 KB
Newer Older
1 2
#!/bin/bash

3
# Copyright 2015-2018 Holger Levsen <holger@layer-acht.org>
4 5
# released under the GPLv=2

Holger Levsen's avatar
Holger Levsen committed
6
DEBUG=false
7 8 9 10 11 12 13 14
. /srv/jenkins/bin/common-functions.sh
common_init "$@"

# common code
. /srv/jenkins/bin/reproducible_common.sh

set -e

15 16 17 18
cleanup_all() {
	schroot --end-session -c $SESSION 2>/dev/null|| true
}

19
update_archlinux_repositories() {
20 21 22
	#
	# init
	#
23 24 25 26 27 28
	local UPDATED=$(mktemp -t archlinuxrb-scheduler-XXXXXXXX)
	local NEW=$(mktemp -t archlinuxrb-scheduler-XXXXXXXX)
	local KNOWN=$(mktemp -t archlinuxrb-scheduler-XXXXXXXX)
	local BLACKLIST="/($(echo $ARCHLINUX_BLACKLISTED | sed "s# #|#g"))/"
	local TOTAL=$(cat ${ARCHLINUX_PKGS}_* | wc -l)
	echo "$(date -u ) - $TOTAL Arch Linux packages were previously known to Arch Linux."
29
	query_db "SELECT suite, name, version FROM sources WHERE architecture='$ARCH';" > $KNOWN
30 31
	echo "$(date -u ) - $(cat $KNOWN | wc -l) Arch Linux packages are known in our database."
	# init session
32
	schroot --begin-session --session-name=$SESSION -c jenkins-reproducible-archlinux
33
	echo "$(date -u ) - updating pacman's knowledge of Arch Linux repositories (by running pacman -Syu --noconform')."
34
	schroot --run-session -c $SESSION --directory /var/tmp -- sudo pacman -Syu --noconfirm
35 36

	#
37 38
	# Get a list of unique package bases.  Non-split packages don't have a pkgbase set
	# so we need to use the pkgname for them instead.
39
	#
40
	echo "$(date -u ) - exporting pacman's knowledge of Arch Linux repositories to the filesystem (by running 'expac -S...')."
41 42 43 44 45 46 47
	schroot --run-session -c $SESSION --directory /var/tmp -- expac -S '%r %e %n %v' | \
		while read repo pkgbase pkgname version; do
			if [[ "$pkgbase" = "(null)" ]]; then
				printf '%s %s %s\n' "$repo" "$pkgname" "$version"
			else
				printf '%s %s %s\n' "$repo" "$pkgbase" "$version"
			fi
48
		done | sort -u -R > "$ARCHLINUX_PKGS"_full_pkgbase_list
49
	TOTAL=$(cat ${ARCHLINUX_PKGS}_full_pkgbase_list | wc -l)
50
	echo "$(date -u ) - $TOTAL Arch Linux packages are now known to Arch Linux."
51 52
	local total=$(query_db "SELECT count(*) FROM sources AS s JOIN schedule AS sch ON s.id=sch.package_id WHERE s.architecture='x86_64' AND sch.date_build_started IS NULL;")
	echo "$(date -u) - updating Arch Linux repositories, currently $total packages scheduled."
53

54
	#
55
	# remove packages which are gone (only when run between 21:00 and 23:59)
56
	#
57
	if [ $(date +'%H') -gt 21 ] ; then
58 59 60
		REMOVED=0
		REMOVE_LIST=""
		for REPO in $ARCHLINUX_REPOS ; do
61
			echo "$(date -u ) - dropping removed packages from filesystem in repository '$REPO':"
62 63 64
			for i in $(find $BASE/archlinux/$REPO -type d -wholename "$BASE/archlinux/$REPO/*" | sort) ; do
				PKG=$(basename $i)
				if ! grep -q "$REPO $PKG" ${ARCHLINUX_PKGS}_full_pkgbase_list > /dev/null ; then
65 66 67 68
					# we could check here whether a package is currently building,
					# and if so defer the pkg removal. (but I think this is pointless,
					# as we are unlikely to kill that build, so meh, let it finish
					# and fail to update the db, because the package is gone...)
69 70
					let REMOVED=$REMOVED+1
					REMOVE_LIST="$REMOVE_LIST $REPO/$PKG"
71
					rm -r --one-file-system $BASE/archlinux/$REPO/$PKG
72 73 74
					echo "$(date -u) - $REPO/$PKG removed as it's gone from the Archlinux repositories."
					SUITE="archlinux_$repo"
					PKG_ID=$(query_db "SELECT id FROM sources WHERE name='$PKG' AND suite='$SUITE' AND architecture='$ARCH';")
75 76 77 78 79 80 81 82
					if [ -n "${PKG_ID}" ] ; then
						query_db "DELETE FROM results WHERE package_id='${PKG_ID}';"
						query_db "DELETE FROM schedule WHERE package_id='${PKG_ID}';"
						query_db "DELETE FROM sources WHERE id='${PKG_ID}';"
						echo "$(date -u) - $SUITE $PKG removed from database."
					else
						echo "$(date -u) - $SUITE $PKG not found in database."
					fi
83 84 85
				fi
			done
		done
86
		MESSAGE="deleted $REMOVED packages: $REMOVE_LIST"
87
		echo -n "$(date -u ) - "
88 89 90
		if [ $REMOVED -ne 0 ] ; then
			irc_message archlinux-reproducible "$MESSAGE"
		fi
91
	fi
92 93
	
	#
94
	# schedule packages
95
	#
96

97
	for REPO in $ARCHLINUX_REPOS ; do
98
		TMPPKGLIST=$(mktemp -t archlinuxrb-scheduler-XXXXXXXX)
99
		echo "$(date -u ) - updating database with available packages in repository '$REPO'."
100
		DATE="$(date -u +'%Y-%m-%d %H:%M')"
101 102
		grep "^$REPO" "$ARCHLINUX_PKGS"_full_pkgbase_list | \
			while read repo pkgbase version; do
103 104
				PKG=$pkgbase
				SUITE="archlinux_$repo"
105 106 107
				PKG_IN_DB=$(grep "^archlinux_$repo|$pkgbase|" $KNOWN | head -1) # FIXME: why oh why is head -1 needed here?
				VERSION=$(echo ${PKG_IN_DB} | cut -d "|" -f3)
			        if [ -z "${PKG_IN_DB}" ] ; then
108
					# new package, add to db and schedule
109
					echo $REPO/$pkgbase >> $NEW
110
					echo "new package found: $repo/$pkgbase $version "
111
					query_db "INSERT into sources (name, version, suite, architecture) VALUES ('$PKG', '$version', '$SUITE', '$ARCH');"
112 113
					PKG_ID=$(query_db "SELECT id FROM sources WHERE name='$PKG' AND suite='$SUITE' AND architecture='$ARCH';")
					query_db "INSERT INTO schedule (package_id, date_scheduled) VALUES ('${PKG_ID}', '$DATE');"
114 115 116
				elif [ "$VERSION" != "$version" ] ; then
					VERCMP="$(schroot --run-session -c $SESSION --directory /var/tmp -- vercmp $version $VERSION || true)"
					if [ "$VERCMP" = "1" ] ; then
117
						# known package with new version, so update db and schedule
118
						echo $REPO/$pkgbase >> $UPDATED
119
						echo "$REPO/$pkgbase $VERSION is known in the database, but repo now has $version which is newer, so rescheduling... "
120
						query_db "UPDATE sources SET version = '$version' WHERE name = '$PKG' AND suite = '$SUITE' AND architecture='$ARCH';"
121 122 123
						if [ -z $(echo $PKG | egrep -v "$BLACKLIST") ] ; then
							echo "$PKG is blacklisted, so not scheduling it."
						else
124
							PKG_ID=$(query_db "SELECT id FROM sources WHERE name='$PKG' AND suite='$SUITE' AND architecture='$ARCH';")
125 126
							echo " SELECT * FROM schedule WHERE package_id = '${PKG_ID}';"
							SCHEDULED=$(query_db "SELECT * FROM schedule WHERE package_id = '${PKG_ID}';")
127
							if [ -z "$SCHEDULED" ] ; then
128
								echo " INSERT INTO schedule (package_id, date_scheduled) VALUES ('${PKG_ID}', '$DATE');"
129
								query_db "INSERT INTO schedule (package_id, date_scheduled) VALUES ('${PKG_ID}', '$DATE');"
130
							else
131
								echo " $PKG (package_id: ${PKG_ID}) already scheduled, not scheduling again."
132
							fi
133
						fi
134 135
					elif [ "$VERCMP" = "-1" ] ; then
						# our version is higher than what's in the repo because we build trunk
136
						echo "$REPO/$pkgbase $VERSION in db is higher than $version in repo because we build trunk."
137
					else
138
						echo " Boom boom boom boom boom."
139 140
						echo " This should never happen: we know about $pkgbase with $VERSION, but repo has $version. VERCMP=$VERCMP"
						echo " PKG_IN_DB=${PKG_IN_DB}"
141 142 143
					fi
				fi

144 145 146
				printf '%s %s\n' "$pkgbase" "$version" >> $TMPPKGLIST
			done
		mv $TMPPKGLIST "$ARCHLINUX_PKGS"_"$REPO"
147 148 149
		new=$(grep -c ^$REPO $NEW || true)
		updated=$(grep -c ^$REPO $UPDATED || true)
		echo "$(date -u ) - scheduled $new/$updated packages in repository '$REPO'."
150
	done
151
	schroot --end-session -c $SESSION
152

153
	#
154
	# schedule up to $MAX packages in DEPWAIT_ or 404_ states 
155
	# (which have been tried at least 24h ago)
156
	#
157
	echo "$(date -u ) - should we schedule packages in DEPWAIT_ or 404_ states?"
158
	local MAX=350
159
	local MINDATE=$(date -u +"%Y-%m-%d %H:%M" -d "24 hours ago")
160 161 162 163
	local SCHDATE=$(date -u +"%Y-%m-%d %H:%M" -d "7 days")
	QUERY="SELECT s.id FROM sources AS s
		JOIN results as r on s.id=r.package_id
		WHERE s.architecture='x86_64'\
164
			AND (r.status LIKE 'DEPWAIT%' or r.status LIKE '404%')
165 166 167
		AND r.build_date < '$MINDATE'
		AND s.id NOT IN (SELECT package_id FROM schedule)
		LIMIT $MAX;"
168 169 170
	local DEPWAIT404=$(query_db "$QUERY")
	if [ ! -z "$DEPWAIT404" ] ; then
		for PKG_ID in $DEPWAIT404 ; do
171 172 173
			QUERY="INSERT INTO schedule (package_id, date_scheduled) VALUES ('${PKG_ID}', '$SCHDATE');"
			query_db "$QUERY"
		done
174
		echo "$(date -u ) - done scheduling $(echo -n "$DEPWAIT404" | wc -l ) packages in DEPWAIT_ or 404_ states."
175 176 177 178
	else
		echo "$(date -u ) - no."
	fi

179
	#
180 181
	# schedule up to $MAX packages we already know about
	# (only if less than $THRESHOLD packages are currently scheduled)
182
	#
183
	echo "$(date -u ) - should we schedule old packages?"
184
	MAX=350
185
	local THRESHOLD=450
186
	MINDATE=$(date -u +"%Y-%m-%d %H:%M" -d "10 days ago")
187
	SCHDATE=$(date -u +"%Y-%m-%d %H:%M" -d "7 days")
188 189 190 191 192 193 194 195 196 197 198 199
	local CURRENT=$(query_db "SELECT count(*) FROM sources AS s JOIN schedule AS sch ON s.id=sch.package_id WHERE s.architecture='x86_64' AND sch.date_build_started IS NULL;")
	if [ $CURRENT -le $THRESHOLD ] ; then
		echo "$(date -u ) - scheduling $MAX old packages."
		QUERY="SELECT s.id, s.name, max(r.build_date) max_date
			FROM sources AS s JOIN results AS r ON s.id = r.package_id
			WHERE s.architecture='x86_64'
			AND r.status != 'blacklisted'
			AND r.build_date < '$MINDATE'
			AND s.id NOT IN (SELECT schedule.package_id FROM schedule)
			GROUP BY s.id, s.name
			ORDER BY max_date
			LIMIT $MAX;"
200
		local OLD=$(query_db "$QUERY")
201
		for PKG_ID in $(echo -n "$OLD" | cut -d '|' -f1) ; do
202
			QUERY="INSERT INTO schedule (package_id, date_scheduled) VALUES ('${PKG_ID}', '$SCHDATE');"
203
			query_db "$QUERY"
204 205 206
		done
		echo "$(date -u ) - done scheduling $MAX old packages."
	else
207
		echo "$(date -u ) - no."
208
	fi
209 210 211 212

	#
	# output stats
	#
213
	rm "$ARCHLINUX_PKGS"_full_pkgbase_list
214
	total=$(query_db "SELECT count(*) FROM sources AS s JOIN schedule AS sch ON s.id=sch.package_id WHERE s.architecture='x86_64' AND sch.date_build_started IS NULL;")
215 216
	new=$(cat $NEW | wc -l 2>/dev/null|| true)
	updated=$(cat $UPDATED 2>/dev/null| wc -l || true)
217
	old=$(echo -n "$OLD" | wc -l 2>/dev/null|| true)
218 219
	depwait404=$(echo -n "$DEPWAIT404" | wc -l 2>/dev/null|| true)
	if [ $new -ne 0 ] || [ $updated -ne 0 ] || [ $old -ne 0 ] || [ $depwait404 -ne 0 ] ; then
220 221
		message="scheduled"
		if [ $new -ne 0 ] ; then
222
			message="$message $new new package"
Holger Levsen's avatar
Holger Levsen committed
223 224
			if [ $new -gt 1 ] ; then
				message="${message}s"
225
			fi
226 227 228 229 230
		fi
		if [ $new -ne 0 ] && [ $updated -ne 0 ] ; then
			message="$message and"
		fi
		if [ $updated -ne 0 ] ; then
231 232 233 234 235
			if [ $updated -gt 1 ] ; then
				message="$message $updated packages with newer versions"
			else
				message="$message $updated package with newer version"
			fi
236
		fi
237
		if [ $old -ne 0 ] && ( [ $new -ne 0 ] || [ $updated -ne 0 ] ) ; then
238
			msg_old=", plus $old already tested ones"
239
		elif [ $old -ne 0 ] ; then
240
			msg_old=" $old already tested packages"
241 242 243
		else
			msg_old=""
		fi
244
		if [ $depwait404 -ne 0 ] && ( [ $new -ne 0 ] || [ $updated -ne 0 ] || [ $old -ne 0 ] ) ; then
245
			msg_depwait404=" and $depwait404 packages with dependency or 404 problems"
246
		elif [ $depwait404 -ne 0 ] ; then
247
			msg_depwait404=" $depwait404 packages with dependency or 404 problems"
248
		else
249
			msg_depwait404=""
250
		fi
251
		MESSAGE="${message}${msg_old}${msg_depwait404}, for $total scheduled out of $TOTAL."
252 253 254 255
		echo -n "$(date -u ) - "
		irc_message archlinux-reproducible "$MESSAGE"
	else
		echo "$(date -u ) - didn't schedule any packages."
256
	fi
257
	rm -f $NEW $UPDATED $KNOWN > /dev/null
258 259
}

260
trap cleanup_all INT TERM EXIT
261
ARCH="x86_64"
262
SESSION="archlinux-scheduler-$RANDOM"
263
update_archlinux_repositories
264
trap - INT TERM EXIT
265 266

# vim: set sw=0 noet :