Commit e340cbf5 authored by Andreas Tille's avatar Andreas Tille

Store JSON file with projects that were read in current run. This will help...

Store JSON file with projects that were read in current run.  This will help to spot removed repositories and remove these also from the gathered data.
parent 2f72780a
......@@ -9,6 +9,7 @@ import subprocess
import time
from dateutil import parser
import re
import json
BLENDSGROUPS={ '3dprinter' : '3dprinting-team'
, 'pkg-a11y' : 'a11y-team'
......@@ -64,9 +65,13 @@ upstreammetadata = [ 'edam',
]
TDNAME='machine-readable'
MACHINEREADABLEARCHIVE='/srv/blends.debian.org/www/_'+TDNAME+'/'+TDNAME+'.tar.xz'
READMEDEBIANARCHIVE='/srv/blends.debian.org/www/_'+TDNAME+'/README.Debian.tar.xz'
MACHINEREADABLEDIR='/srv/blends.debian.org/www/_'+TDNAME
MACHINEREADABLEARCHIVE=os.path.join(MACHINEREADABLEDIR, TDNAME+'.tar.xz')
READMEDEBIANARCHIVE=os.path.join(MACHINEREADABLEDIR, 'README.Debian.tar.xz')
TARGETDIR=os.path.join(os.environ['HOME'],TDNAME)
JSONPATH=os.path.join(TARGETDIR,'.data')
JSON=os.path.join(JSONPATH, TDNAME+'_data.json')
jsondata=[] # project, source, blend, path, status
SLEEPTIMEFILES=5 # Wait 5 seconds until next file is fetched
SLEEPTIMEPROJECTS=60 # Wait 60 seconds until next group is queried for all projects
......@@ -162,14 +167,22 @@ for blend, gpath in sorted(BLENDSGROUPS.items()):
valid_group_names.extend(valid_subgroup_names)
projects.extend(sprojects)
for project in projects:
jdata = {}
name = project.name
jdata['blend'] = blend
jdata['project'] = name
jdata['path'] = project.path
# bnd was parsed twice in 'Debian Java Maintainers'
if project.namespace['name'] not in valid_group_names :
print("Project %s is in group '%s' and thus not in the group '%s' which is parsed here -> ignored" % (name, project.namespace['name'], group.name))
ignored += 1
jdata['status'] = 'ignore'
jsondata.append(jdata)
continue
if name in BLACKLIST or name.endswith('.pages.debian.net'):
ignored += 1
jdata['status'] = 'ignore'
jsondata.append(jdata)
if debug > 0:
print("Ignore project %s of blend %s." % (name, blend))
continue
......@@ -187,11 +200,14 @@ for blend, gpath in sorted(BLENDSGROUPS.items()):
stored_team = re.sub('Vcs-Browser: https://salsa.debian.org/([^/]+)/.*','\\1',line.strip())
if stored_team != gpath:
print("Currently team %s is parsed but project %s was previously found and stored in team %s" % (gpath, name, stored_team))
if line.startswith('Source: '):
jdata['source'] = line[len('Source: '):].strip()
if line.startswith(LAST_ACTIVITY):
last_activity_at = line[LAST_ACTIVITY_LENGTH:].strip()
if last_activity_at == project.last_activity_at:
dosomething=False
unchanged += 1
jdata['status'] = 'unchanged'
if debug > 1:
print("Do nothing in %s since found last_activity_at %s == repository %s" % (name, last_activity_at, project.last_activity_at))
else:
......@@ -200,24 +216,22 @@ for blend, gpath in sorted(BLENDSGROUPS.items()):
if ( last_activity_project < last_activity_recorded ):
print("For some strange reason the repository of %s has an older timestamp (%s) than it was recorded before (%s). Do not parse again!" % (name, project.last_activity_at, last_activity_at))
dosomething=False
jdata['status'] = 'unchanged'
unchanged += 1
if debug > 0:
print("Continue with %s since found last_activity_at %s != repository %s" % (name, last_activity_at, project.last_activity_at))
jdata['status'] = 'changed'
changed += 1
invcs.close()
except FileNotFoundError:
jdata['status'] = 'new'
new += 1
pass
if not dosomething:
jsondata.append(jdata)
continue
if debug > 1:
print("Writing %s." % os.path.join(namedir,name+'.vcs'))
with open(os.path.join(namedir,name+'.vcs'), 'w') as out:
out.write("Vcs-Browser: %s\n" % (project.web_url))
out.write("Vcs-Git: %s.git\n" % (project.web_url))
out.write("Blend: %s\n" % blend)
out.write("%s%s\n" % (LAST_ACTIVITY, project.last_activity_at))
out.close()
# print(name)
# be friendly to Salsa and add some delay time before real reading of data
# see thread around https://lists.debian.org/debian-devel/2018/07/msg00125.html
......@@ -228,17 +242,36 @@ for blend, gpath in sorted(BLENDSGROUPS.items()):
output_metadata(pr, 'debian/upstream', upstreammetadata)
try:
with open(os.path.join(namedir,name+'.changelog'), 'r') as chlog:
chlogname = chlog.readline().strip().split(' ')[0]
if chlogname != name:
print("Warning: package name of project found at %s/%s is different than dir: %s" % (gpath, name, chlogname))
source = chlog.readline().strip().split(' ')[0]
if source != name:
print("Warning: package name of project found at %s/%s is different than dir: %s" % (gpath, name, source))
except FileNotFoundError:
pass # If no changelog file was created this project is irrelevant anyway
with open(os.path.join(namedir,name+'.vcs'), 'w') as out:
out.write("Vcs-Browser: %s\n" % (project.web_url))
out.write("Vcs-Git: %s.git\n" % (project.web_url))
out.write("Blend: %s\n" % blend)
if source:
out.write("Source: %s\n" % source)
jdata['source'] = source
out.write("%s%s\n" % (LAST_ACTIVITY, project.last_activity_at))
out.close()
jsondata.append(jdata)
print("In %s %i repositories changed, %i remained unchanged, %i were new and %i were ignored." % (blend, changed, unchanged, new, ignored))
time.sleep(SLEEPTIMEPROJECTS)
# os.system("tar --exclude=*README.Debian -caf %s %s" % (MACHINEREADABLEARCHIVE, TARGETDIR))
p = subprocess.Popen(['tar', '--exclude=*README.Debian', '-caf', MACHINEREADABLEARCHIVE, TDNAME], cwd=os.environ['HOME'])
try:
os.makedirs(MACHINEREADABLEDIR)
except:
pass
p = subprocess.Popen(['tar', '--exclude=*README.Debian', '--exclude='+JSON, '-caf', MACHINEREADABLEARCHIVE, TDNAME], cwd=os.environ['HOME'])
p.wait()
# os.system("tar --exclude=*.control --exclude=*.changelog --exclude=*.upstream --exclude=*.edam --exclude=*.vcs --exclude=*.copyright -caf %s %s" % (READMEDEBIANARCHIVE, TARGETDIR))
p = subprocess.Popen(['tar', '--exclude=*.control', '--exclude=*.changelog', '--exclude=*.upstream', '--exclude=*.edam', '--exclude=*.vcs', '--exclude=*.copyright', '-caf', READMEDEBIANARCHIVE, TDNAME], cwd=os.environ['HOME'])
p = subprocess.Popen(['tar', '--exclude=*.control', '--exclude=*.changelog', '--exclude=*.upstream', '--exclude=*.edam', '--exclude=*.vcs', '--exclude=*.copyright', '--exclude='+JSON, '-caf', READMEDEBIANARCHIVE, TDNAME], cwd=os.environ['HOME'])
p.wait()
try:
os.makedirs(JSONPATH)
except:
pass
with open(JSON, 'w') as outfile:
json.dump(jsondata, outfile)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment