Skip to content
Commits on Source (2)
......@@ -150,10 +150,24 @@ if [ "$HOSTNAME" = "$MAINNODE" ] ; then
#
echo "$(date -u) - Looking for unhealthy nodes."
cd ~/jobs
DUMMY_FILE=$(mktemp --tmpdir=$TMPDIR maintenance-XXXXXXX)
SICK=""
for i in reproducible_node_health_check_* ; do
for i in reproducible_node_health_check_* reproducible_maintenance_* ; do
case $i in
reproducible_node_health_check_*)
NODE_ALIAS=$(echo $i | cut -d '_' -f6)
NODE_ARCH=$(echo $i | cut -d '_' -f5)
FORCE_DATE=$(date -u -d "1 hour ago" '+%Y-%m-%d %H:%M')
MAXDIFF=4
;;
reproducible_maintenance_*)
NODE_ALIAS=$(echo $i | cut -d '_' -f4)
NODE_ARCH=$(echo $i | cut -d '_' -f3)
FORCE_DATE=$(date -u -d "5 hour ago" '+%Y-%m-%d %H:%M')
MAXDIFF=2
;;
esac
touch -d "$FORCE_DATE" $DUMMY_FILE
case $NODE_ARCH in
amd64) NODE="profitbricks-build${NODE_ALIAS#profitbricks}-amd64.debian.net" ;;
i386) NODE="profitbricks-build${NODE_ALIAS#profitbricks}-i386.debian.net" ;;
......@@ -174,7 +188,9 @@ if [ "$HOSTNAME" = "$MAINNODE" ] ; then
fi
if [ $DIFF -eq -1 ] ; then
echo "Problems analysing $i build logs, ignoring $NODE."
elif [ $DIFF -gt 4 ] ; then
# either the diff is greater than $MAXDIFF (=the last $MAXDIFF job runs failed)
# or the last successful run is older than an hour (=a job is still running/hanging)
elif [ $DIFF -gt $MAXDIFF ] || [ $LAST -ot $DUMMY_FILE ] ; then
echo -n "$i job has issues since more than an hour"
if grep -q $NODE ~/offline_nodes >/dev/null 2>&1 ; then
echo " and $NODE already marked as offline, good."
......@@ -198,6 +214,7 @@ if [ "$HOSTNAME" = "$MAINNODE" ] ; then
fi
irc_message debian-reproducible "$MESSAGE To make this permanent, edit jenkins-home/offline_nodes in git."
fi
rm -f $DUMMY_FILE
fi
echo "$(date -u) - updating the schroots and pbuilder now..."
......