Skip to content
Commits on Source (2)
#!/bin/sh
# This script should be called on Alioth to gather all machine readable files
# of VCSes mentioned in SVNDIRS and GITDIRS
#
# TODO: store file <pkg>.vcs with the Vcs URL where the files were found
TDNAME=machine-readable
MACHINEREADABLEARCHIVE=/srv/home/groups/blends/htdocs/${TDNAME}/${TDNAME}.tar.bz2
READMEDEBIANARCHIVE=/srv/home/groups/blends/htdocs/${TDNAME}/README.Debian.tar.bz2
TARGETDIR=${HOME}/$TDNAME
mkdir -p $TARGETDIR
ERRLOG=${HOME}/${TDNAME}.log
rm -f $ERRLOG
SVNDIRS="debian-med/trunk/packages
debian-science/packages
debichem/unstable
debichem/wnpp
debichem/experimental
debichem/non-free
pkg-games/packages
pkg-grass/packages
pkg-java
pkg-multimedia/unstable
demudi"
GITDIRS="debian-astro/packages
debian-edu
debian-edu/pkg-team
debian-med
debian-science/packages
debichem/packages
3dprinter/packages
debhelper
lintian
pkg-a11y
pkg-exppsy
pkg-games
pkg-grass
pkg-hamradio
pkg-lynx
pkg-osm
pkg-multimedia
pkg-perl/packages
pkg-octave
tryton
demudi"
svn_get_README_Debian () {
for rdfile in $getfile ; do
svn export svn://localhost/svn/$1/$rdfile >/dev/null 2>/dev/null
file=`basename $rdfile`
if [ "$file" = "README.Debian" ] ; then
mv $file $TARGETDIR/$firstletter/${srcname}.README.Debian
else
rdfirstletter=`echo $file | sed "s/^\(.\).*/\1/"`
mv $file $TARGETDIR/$rdfirstletter/${file} || echo "No file $file for $pkg found (in svn_get_README_Debian)" >> $ERRLOG
fi
echo "README.Debian: $file" >> $TARGETDIR/$firstletter/${srcname}.vcs
done
}
svn_checkout_machine_readable () {
curdir=`pwd`
cd $TARGETDIR
TMPLIST=`mktemp`
svn list --verbose svn://localhost/svn/$1 --recursive | \
grep -v -e '/tags/' -e '/branches/' -e '/patches/' | \
grep -e "/control$" -e "/changelog$" -e "/copyright$" -e "/upstream$" -e "/upstream/metadata$" -e "/upstream/.*edam$" -e "README.Debian$" | \
grep -v "tests/control" | \
sed 's/^.*[[:space:]]\([^[:space:]]\+\)/\1/' \
> $TMPLIST
# debug
# cp -a $TMPLIST ~/_svnfiles
PKGLIST=`mktemp`
# Regard different directory layout options
if grep -q "/trunk/debian/" $TMPLIST ; then
# for Debian Med and Debian Science layout
svndir=`echo $1 | sed 's?^[^/]*/??'`
sed -e 's?/debian/.*?/?' -e 's?trunk/.*??' $TMPLIST | sed -e "s?^$svndir/??" -e 's+/$++' -e 's#^[^/]\+/\([^/]\+\)/.*#\1#' -e 's#^[^/]\+/\([^/]\+\)$#\1#' | sort | uniq > $PKGLIST
trunklayout="/trunk"
else
# for DebiChemn layout
sed 's?/debian/.*??' $TMPLIST | sort | uniq > $PKGLIST
trunklayout=""
fi
# set -x
for pkgdir in `cat $PKGLIST` ; do
pkg=`echo $pkgdir | sed -e 's?^.*/\([^/]\+\)?\1?'`
chlog=`grep -e "/$pkg/[^/]\+/debian/changelog$" -e "^$pkg/trunk/debian/changelog$" -e "^$pkg/debian/changelog$" -e "/$pkg/debian/changelog$" -e "^[a-z]\+/$pkg/[a-z]\+/trunk/debian/changelog$" $TMPLIST | sort | tail -n 1 2>/dev/null`
if [ "" != "$chlog" ] ; then
svn export svn://localhost/svn/$1/$chlog >/dev/null
srcname=`dpkg-parsechangelog -lchangelog 2>/dev/null | sed -n 's/^Source: //p'`
## DEBUG
#echo "$pkgdir -> $pkg ($chlog) -> $srcname"
if [ "" != "$srcname" ] ; then
vcslocation=`echo $chlog | sed 's?debian/changelog$??'`
if ! echo $vcslocation | grep -q tags ; then # sometimes some tags end up here but these seem to be packages removed from Debian
trunk="trunk" # at least for Debian Med and Debian Science
firstletter=`echo $srcname | sed "s/^\(.\).*/\1/"`
mkdir -p $TARGETDIR/$firstletter
mv changelog $TARGETDIR/$firstletter/${srcname}.changelog || echo "No changelog file for $pkg found" >> $ERRLOG
echo "Vcs-Svn: svn://anonscm.debian.org/$1/$vcslocation" > $TARGETDIR/$firstletter/${srcname}.vcs
echo "Vcs-Browser: http://anonscm.debian.org/viewvc/$1/$vcslocation" >> $TARGETDIR/$firstletter/${srcname}.vcs
echo "Blend: `echo $1 | sed 's?/.*??'`" >> $TARGETDIR/$firstletter/${srcname}.vcs
for file in control copyright upstream upstream/metadata upstream/edam README.Debian; do
srcfile=${file#upstream/}
destfile=${file%/metadata}
if echo $destfile | grep -q "upstream/.*edam" ; then
destfile=`echo $destfile | sed 's?upstream/??'`
fi
if [ "$file" = "README.Debian" ] ; then
# It is possible to have more than one debian/*.README.Debian
getfile=`grep -e "/$pkg/trunk/debian/.*$file$" -e "^$pkg/trunk/debian/.*$file$" -e "^$pkg/debian/.*$file$" -e "trunk/$pkg/debian/.*$file$" -e "/$pkg/[a-z]\+/trunk/debian/.*$file$" $TMPLIST 2>/dev/null` || true
svn_get_README_Debian $1
getfile="" # do not try to fetch file again
else
getfile=`grep -e "/$pkg/trunk/debian/$file$" -e "^$pkg/trunk/debian/$file$" -e "^$pkg/debian/$file$" -e "trunk/$pkg/debian/$file$" -e "/$pkg/[a-z]\+/trunk/debian/$file$" $TMPLIST 2>/dev/null` || true
if [ `echo "$getfile" | wc -l` -gt 1 ] ; then
# try whether the package name really matches (no idea why the above partly fails
getfile=`echo $getfile | grep -w "$pkg"`
if [ `echo "$getfile" | wc -l` -gt 1 ] ; then
echo "Two different locations for $getfile" >> $ERRLOG
getfile=`echo $getfile | head -n 1`
echo " --->> simply choose the first one: $getfile" >> $ERRLOG
else
echo "??? for package $getfile there was a competing name ???" >> $ERRLOG
fi
fi
fi
if [ "" != "$getfile" ] ; then
svn export svn://localhost/svn/$1/$getfile >/dev/null 2>/dev/null
if [ -e $srcfile ] ; then
mv $srcfile $TARGETDIR/$firstletter/${srcname}.$destfile || echo "No file $srcfile for $pkg found (in second try to fetch data from svn)" >> $ERRLOG
else
echo "ERR 1: Can not obtain file ${file} of source ${srcname} of team $1 from ${getfile}" >> $ERRLOG
fi
else
if ! echo $vcslocation | grep -q -e trunk -e "branch.*etch" ; then
if [ "$destfile" != "upstream" ] ; then
echo "Package $pkg is lacking trunk directory in vcslocation ${vcslocation}. Try to find file $file anyway." >> $ERRLOG
if [ "$file" = "README.Debian" ] ; then
getfile=`grep -e "$vcslocation/debian/.*$file$" $TMPLIST 2>/dev/null` || true
svn_get_README_Debian $1
else
getfile=`grep -e "$vcslocation/debian/$file$" $TMPLIST 2>/dev/null` || true
if [ "" != "$getfile" ] ; then
svn export svn://localhost/svn/$1/$getfile >/dev/null 2>/dev/null
if [ -e $srcfile ] ; then
mv $srcfile $TARGETDIR/$firstletter/${srcname}.$destfile || echo "No file $srcfile for $pkg found (in third try to fetch data from svn)" >> $ERRLOG
else
echo "ERR 2: Can not obtain file ${file} of source ${srcname} of team $1 from ${getfile}" >> $ERRLOG
fi
else
echo "Did not found $file for package $pkg (`grep "$pkg" $TMPLIST | grep "$file"`)" >> $ERRLOG
fi
fi
fi
fi
fi
done
fi
else
echo "Can not parse changelog for source name of $pkg" >> $ERRLOG
fi
else
echo "No changelog found for $pkg" >> $ERRLOG
cp -a $TMPLIST ~/_changelog_for_${pkg}_missing
cp -a $PKGLIST ~/_pkglist_for_${pkg}_missing
fi
done
rm $TMPLIST $PKGLIST
cd $curdir
}
git_checkout_machine_readable () {
curdir=`pwd`
cd $1
TMPCHLOG=`mktemp`
if git show HEAD:debian/changelog 2>/dev/null > $TMPCHLOG ; then
srcname=`dpkg-parsechangelog -l$TMPCHLOG 2>/dev/null | sed -n 's/^Source: //p'`
firstletter=`echo $srcname | sed "s/^\(.\).*/\1/"`
mkdir -p $TARGETDIR/$firstletter
mv $TMPCHLOG $TARGETDIR/$firstletter/${srcname}.changelog || echo "No file $TMPCHLOG for $1 found (in git_checkout_machine_readable)" >> $ERRLOG
echo "Vcs-Git: git://anonscm.debian.org$1" > $TARGETDIR/$firstletter/${srcname}.vcs
echo "Vcs-Browser: https://anonscm.debian.org"`echo $1 | sed 's+^/git/+/cgit/+'` >> $TARGETDIR/$firstletter/${srcname}.vcs
echo "Blend: `echo $2 | sed 's?/.*??'`" >> $TARGETDIR/$firstletter/${srcname}.vcs
for file in `git ls-tree -r HEAD debian/ 2>/dev/null | \
grep -e "debian/control$" -e "debian/changelog$" -e "debian/copyright$" -e "debian/upstream$" -e "debian/upstream/metadata$" -e "debian/upstream/.*edam$" -e "debian/.*README.Debian$" | \
sed 's/^[0-9]\+[[:space:]]\+blob[[:space:]]\+[0-9a-f]\+[[:space:]]\+//'` ; do
## grep -v -e "debian/tests" -e "/usr/share/doc" -e "DEBIAN" -e "debian/components" -e "jquery" -e "debian/templates" | \
# for very strange reasons in two cases (pkg-games/pentobi.git and pkg-octave/octave.git) the line above contains strings not starting with ^debian ... enforcing this
if echo $file | grep -q ^debian ; then
destfile=${file%/metadata}
target=$TARGETDIR/$firstletter/${srcname}.`echo $destfile | sed -e 's?debian/??' -e 's?upstream/??'`
if echo $file | grep -q README.Debian ; then
rdfile=`basename $file`
if [ "$rdfile" = "README.Debian" ] ; then
git show HEAD:$file > $target
echo "README.Debian: `basename $file`" >> $TARGETDIR/$firstletter/${srcname}.vcs
else
rdfirstletter=`echo $rdfile | sed "s/^\(.\).*/\1/"`
git show HEAD:$file > $TARGETDIR/$rdfirstletter/${rdfile}
echo "README.Debian: $rdfile" >> $TARGETDIR/$firstletter/${srcname}.vcs
fi
else
git show HEAD:$file > $target
fi
fi
done
else
echo "Can not find debian/changelog in $1" >> $ERRLOG
fi
rm -f $TMPCHLOG
cd $curdir
}
mkdir -p $TARGETDIR
rm -rf $TARGETDIR/*
for svndir in $SVNDIRS ; do
echo $svndir >> $ERRLOG
svn_checkout_machine_readable $svndir
done
for gitdir in $GITDIRS ; do
echo $gitdir >> $ERRLOG
for gitrepo in `find /git/$gitdir -mindepth 1 -maxdepth 1 -type d -name "*.git"` ; do
# echo $gitrepo
if [ "$gitrepo" != ".git" ] ; then
if ! echo "$gitrepo" | grep -q "package_template" ; then
git_checkout_machine_readable $gitrepo $gitdir
else
echo "Ignore template $gitrepo in $gitdir" >> $ERRLOG
fi
fi
done
done
FOUNDDIRS=`ls $TDNAME | wc -l`
if [ $FOUNDDIRS -lt 10 ] ; then
echo "Found way to less packages in $TDNAME" >> $ERRLOG
exit
fi
rm -f $MACHINEREADABLEARCHIVE
cd $TARGETDIR
cd ..
find $TDNAME -maxdepth 1 -mindepth 1 -name ".*" -delete
tar --exclude=*README.Debian -cjf $MACHINEREADABLEARCHIVE $TDNAME
tar --exclude=*.control --exclude=*.changelog --exclude=*.upstream --exclude=*.edam --exclude=*.vcs --exclude=*.copyright -cjf $READMEDEBIANARCHIVE $TDNAME
# Check for remaining tempfiles
if ls -l /tmp/tmp.* 2>/dev/null | grep `whoami` > /dev/null ; then
echo "There are tempfiles remaining!\n" | mailx -s "$0 has left some remaining files in /tmp" tille@debian.org
fi
......@@ -6,6 +6,7 @@ import sys
import shutil
import base64
import subprocess
import time
BLENDSGROUPS={ 'Debian 3D Printing Team' : '3dprinter'
, 'Debian Accessibility Team' : 'pkg-a11y'
......@@ -103,6 +104,11 @@ for group in blends_groups:
# DEBUG : only few projects to be faster
# projects = group.projects.list(page=1, per_page=10, order_by='name', sort='asc')
for pr in projects:
try:
project = gl.projects.get(pr.attributes['id']) # without this extra get repository_tree() fails
except gitlab.exceptions.GitlabHttpError:
print("Warning: GitlabHttpError while trying to fetch attributes of %s. Wait one second and try again." % pr.attributes['id'])
time.sleep(1)
project = gl.projects.get(pr.attributes['id']) # without this extra get repository_tree() fails
name = project.attributes['name']
namedir = os.path.join(TARGETDIR, name[0])
......