Skip to content
Commits on Source (9)
3.0.19:
Check archives after download
Fix python regexps syntax (deprecation)
3.0.18:
Rename protobuf and use specific package to avoid conflicts
3.0.17:
......
......@@ -238,7 +238,7 @@ class DirectHttpDownload(DirectFTPDownload):
encoding = None
if 'content-type' in self.headers:
content_type = self.headers['content-type'].lower()
match = re.search('charset=(\S+)', content_type)
match = re.search(r'charset=(\S+)', content_type)
if match:
encoding = match.group(1)
if encoding is None:
......@@ -257,7 +257,7 @@ class DirectHttpDownload(DirectFTPDownload):
rfile['size'] = int(parts[1].strip())
if parts[0].strip() == 'Last-Modified':
# Sun, 06 Nov 1994
res = re.match('(\w+),\s+(\d+)\s+(\w+)\s+(\d+)', parts[1].strip())
res = re.match(r'(\w+),\s+(\d+)\s+(\w+)\s+(\d+)', parts[1].strip())
if res:
rfile['hash'] = hashlib.md5(str(res.group(0)).encode('utf-8')).hexdigest()
rfile['day'] = int(res.group(2))
......@@ -265,7 +265,7 @@ class DirectHttpDownload(DirectFTPDownload):
rfile['year'] = int(res.group(4))
continue
# Sunday, 06-Nov-94
res = re.match('(\w+),\s+(\d+)-(\w+)-(\d+)', parts[1].strip())
res = re.match(r'(\w+),\s+(\d+)-(\w+)-(\d+)', parts[1].strip())
if res:
rfile['hash'] = hashlib.md5(str(res.group(0)).encode('utf-8')).hexdigest()
rfile['day'] = int(res.group(2))
......@@ -273,7 +273,7 @@ class DirectHttpDownload(DirectFTPDownload):
rfile['year'] = 2000 + int(res.group(4))
continue
# Sun Nov 6 08:49:37 1994
res = re.match('(\w+)\s+(\w+)\s+(\d+)\s+\d{2}:\d{2}:\d{2}\s+(\d+)', parts[1].strip())
res = re.match(r'(\w+)\s+(\w+)\s+(\d+)\s+\d{2}:\d{2}:\d{2}\s+(\d+)', parts[1].strip())
if res:
rfile['hash'] = hashlib.md5(str(res.group(0)).encode('utf-8')).hexdigest()
rfile['day'] = int(res.group(3))
......
......@@ -129,9 +129,18 @@ class FTPDownload(DownloadInterface):
error = False
except Exception as e:
self.logger.error('Could not get errcode:' + str(e))
nbtry += 1
curl.close()
fp.close()
skip_check_uncompress = os.environ.get('UNCOMPRESS_SKIP_CHECK', None)
if not error and skip_check_uncompress is None:
archive_status = Utils.archive_check(file_path)
if not archive_status:
self.logger.error('Archive is invalid or corrupted, deleting file and retrying download')
error = True
if os.path.exists(file_path):
os.remove(file_path)
return error
def download(self, local_dir, keep_dirs=True):
......@@ -253,7 +262,7 @@ class FTPDownload(DownloadInterface):
encoding = None
if 'content-type' in self.headers:
content_type = self.headers['content-type'].lower()
match = re.search('charset=(\S+)', content_type)
match = re.search(r'charset=(\S+)', content_type)
if match:
encoding = match.group(1)
if encoding is None:
......@@ -288,7 +297,7 @@ class FTPDownload(DownloadInterface):
rfile['hash'] = hashlib.md5(line.encode('utf-8')).hexdigest()
try:
rfile['year'] = int(parts[7])
except Exception as e:
except Exception:
# specific ftp case issues at getting date info
curdate = datetime.now()
rfile['year'] = curdate.year
......
......@@ -17,7 +17,7 @@ except ImportError:
class HTTPParse(object):
def __init__(self, dir_line, file_line, dir_name=1, dir_date=2, file_name=1, file_date=2, file_date_format=None, file_size=3):
"""
r'''
http.parse.dir.line: <img[\s]+src="[\S]+"[\s]+alt="\[DIR\]"[\s]*/?>[\s]*<a[\s]+href="([\S]+)/"[\s]*>.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})
http.parse.file.line: <img[\s]+src="[\S]+"[\s]+alt="\[[\s]+\]"[\s]*/?>[\s]<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})
http.group.dir.name: 1
......@@ -25,7 +25,7 @@ class HTTPParse(object):
http.group.file.name: 1
http.group.file.date: 2
http.group.file.size: 3
"""
'''
self.dir_line = dir_line
self.file_line = file_line
self.dir_name = dir_name
......@@ -85,7 +85,7 @@ class HTTPDownload(FTPDownload):
encoding = None
if 'content-type' in self.headers:
content_type = self.headers['content-type'].lower()
match = re.search('charset=(\S+)', content_type)
match = re.search(r'charset=(\S+)', content_type)
if match:
encoding = match.group(1)
if encoding is None:
......@@ -96,14 +96,14 @@ class HTTPDownload(FTPDownload):
# lets get the output in a string
result = output.getvalue().decode(encoding)
'''
'http.parse.dir.line': r'<a[\s]+href="([\S]+)/".*alt="\[DIR\]">.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})',
'http.parse.file.line': r'<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})',
'http.group.dir.name': 1,
'http.group.dir.date': 2,
'http.group.file.name': 1,
'http.group.file.date': 2,
'http.group.file.size': 3,
r'''
http.parse.dir.line': r'<a[\s]+href="([\S]+)/".*alt="\[DIR\]">.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})',
http.parse.file.line': r'<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})',
http.group.dir.name': 1,
http.group.dir.date': 2,
http.group.file.name': 1,
http.group.file.date': 2,
http.group.file.size': 3,
'''
rfiles = []
......
......@@ -3,6 +3,7 @@ import os
from datetime import datetime
import time
from biomaj_core.utils import Utils
from biomaj_download.download.interface import DownloadInterface
from irods.session import iRODSSession
from irods.models import Collection, DataObject, User
......@@ -96,6 +97,15 @@ class IRODSDownload(DownloadInterface):
rfile['download_time'] = 0
rfile['error'] = True
raise Exception("IRODS:Download:Error:" + rfile['root'] + '/' + rfile['name'])
else:
archive_status = Utils.archive_check(file_path)
if not archive_status:
self.logger.error('Archive is invalid or corrupted, deleting file')
rfile['error'] = True
if os.path.exists(file_path):
os.remove(file_path)
raise Exception("IRODS:Download:Error:" + rfile['root'] + '/' + rfile['name'])
end_time = datetime.now()
end_time = time.mktime(end_time.timetuple())
rfile['download_time'] = end_time - start_time
......
......@@ -119,7 +119,7 @@ class RSYNCDownload(DownloadInterface):
rfile['save_as'] = rfile['name']
if keep_dirs:
file_dir = local_dir + '/' + os.path.dirname(rfile['save_as'])
if re.match('\S*\/$', file_dir):
if re.match(r'\S*\/$', file_dir):
file_path = file_dir + '/' + os.path.basename(rfile['save_as'])
else:
file_path = file_dir + os.path.basename(rfile['save_as'])
......
biomaj3-download (3.0.19-1) unstable; urgency=medium
* Team upload.
* New upstream version
* debhelper 12
* Standards-Version: 4.3.0
* Testsuite: autopkgtest-pkg-python
* Remove trailing whitespace in debian/changelog
* Remove trailing whitespace in debian/copyright
-- Andreas Tille <tille@debian.org> Wed, 30 Jan 2019 10:40:31 +0100
biomaj3-download (3.0.18-1) unstable; urgency=medium
[ Jelmer Vernooij ]
......
Source: biomaj3-download
Section: python
Priority: optional
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Uploaders: Olivier Sallou <osallou@debian.org>
Build-Depends: debhelper (>= 9), dh-python,
Section: python
Testsuite: autopkgtest-pkg-python
Priority: optional
Build-Depends: debhelper (>= 12~),
dh-python,
protobuf-compiler,
python3-all,
python3-bcrypt,
......@@ -23,17 +25,20 @@ Build-Depends: debhelper (>= 9), dh-python,
python3-yaml,
python3-biomaj3-core,
python3-biomaj3-zipkin
Standards-Version: 4.1.3
Homepage: https://github.com/genouest/biomaj-download
Standards-Version: 4.3.0
Vcs-Browser: https://salsa.debian.org/med-team/biomaj3-download
Vcs-Git: https://salsa.debian.org/med-team/biomaj3-download.git
Homepage: https://github.com/genouest/biomaj-download
Package: python3-biomaj3-download
Architecture: all
Depends: ${misc:Depends}, ${python3:Depends}
Depends: ${misc:Depends},
${python3:Depends}
Recommends: ${python3:Recommends}
Suggests: ${python3:Suggests}, python3-gunicorn, mongodb, redis-server
XB-Python-Egg-Name: biomaj-download
Suggests: ${python3:Suggests},
python3-gunicorn,
mongodb,
redis-server
Description: BioMAJ download management library
BioMAJ downloads remote data banks, checks their status and applies
transformation workflows, with consistent state, to provide ready-to-use
......@@ -45,3 +50,4 @@ Description: BioMAJ download management library
.
This package contains the library and microservice to manage downloads
in BioMAJ3
XB-Python-Egg-Name: biomaj-download
......@@ -21,7 +21,7 @@ config = {
'url': 'http://biomaj.genouest.org',
'download_url': 'http://biomaj.genouest.org',
'author_email': 'olivier.sallou@irisa.fr',
'version': '3.0.18',
'version': '3.0.19',
'classifiers': [
# How mature is this project? Common values are
# 3 - Alpha
......@@ -46,7 +46,7 @@ config = {
'biomaj_zipkin',
'pycurl',
'py-bcrypt',
'pika',
'pika==0.11.2',
'redis',
'PyYAML',
'flask',
......
......@@ -443,9 +443,26 @@ class TestBiomajFTPDownload(unittest.TestCase):
(file_list, dir_list) = ftpd.list()
# ftpd.match([r'^alu.*\.gz$'], file_list, dir_list)
ftpd.match([r'^1.*KB\.zip$'], file_list, dir_list)
try:
ftpd.download(self.utils.data_dir)
except Exception:
self.assertTrue(1==1)
else:
self.assertTrue(1==0)
ftpd.close()
# self.assertTrue(len(ftpd.files_to_download) == 2)
def test_download_skip_uncompress_checks(self):
# ftpd = FTPDownload('ftp', 'ftp.ncbi.nih.gov', '/blast/db/FASTA/')
os.environ['UNCOMPRESS_SKIP_CHECK'] = "1"
ftpd = FTPDownload('ftp', 'speedtest.tele2.net', '/')
(file_list, dir_list) = ftpd.list()
# ftpd.match([r'^alu.*\.gz$'], file_list, dir_list)
ftpd.match([r'^1.*KB\.zip$'], file_list, dir_list)
ftpd.download(self.utils.data_dir)
ftpd.close()
self.assertTrue(len(ftpd.files_to_download) == 2)
del os.environ['UNCOMPRESS_SKIP_CHECK']
def test_download_in_subdir(self):
ftpd = FTPDownload('ftp', 'ftp.ncbi.nih.gov', '/blast/')
......@@ -657,4 +674,3 @@ class TestBiomajIRODSDownload(unittest.TestCase):
irodsd.set_offline_dir(self.utils.data_dir)
(files_list, dir_list) = irodsd.list()
self.assertTrue(len(files_list) != 0)