Skip to content
Commits on Source (9)
3.0.19:
Check archives after download
Fix python regexps syntax (deprecation)
3.0.18:
Rename protobuf and use specific package to avoid conflicts
3.0.17:
......
......@@ -238,7 +238,7 @@ class DirectHttpDownload(DirectFTPDownload):
encoding = None
if 'content-type' in self.headers:
content_type = self.headers['content-type'].lower()
match = re.search('charset=(\S+)', content_type)
match = re.search(r'charset=(\S+)', content_type)
if match:
encoding = match.group(1)
if encoding is None:
......@@ -257,7 +257,7 @@ class DirectHttpDownload(DirectFTPDownload):
rfile['size'] = int(parts[1].strip())
if parts[0].strip() == 'Last-Modified':
# Sun, 06 Nov 1994
res = re.match('(\w+),\s+(\d+)\s+(\w+)\s+(\d+)', parts[1].strip())
res = re.match(r'(\w+),\s+(\d+)\s+(\w+)\s+(\d+)', parts[1].strip())
if res:
rfile['hash'] = hashlib.md5(str(res.group(0)).encode('utf-8')).hexdigest()
rfile['day'] = int(res.group(2))
......@@ -265,7 +265,7 @@ class DirectHttpDownload(DirectFTPDownload):
rfile['year'] = int(res.group(4))
continue
# Sunday, 06-Nov-94
res = re.match('(\w+),\s+(\d+)-(\w+)-(\d+)', parts[1].strip())
res = re.match(r'(\w+),\s+(\d+)-(\w+)-(\d+)', parts[1].strip())
if res:
rfile['hash'] = hashlib.md5(str(res.group(0)).encode('utf-8')).hexdigest()
rfile['day'] = int(res.group(2))
......@@ -273,7 +273,7 @@ class DirectHttpDownload(DirectFTPDownload):
rfile['year'] = 2000 + int(res.group(4))
continue
# Sun Nov 6 08:49:37 1994
res = re.match('(\w+)\s+(\w+)\s+(\d+)\s+\d{2}:\d{2}:\d{2}\s+(\d+)', parts[1].strip())
res = re.match(r'(\w+)\s+(\w+)\s+(\d+)\s+\d{2}:\d{2}:\d{2}\s+(\d+)', parts[1].strip())
if res:
rfile['hash'] = hashlib.md5(str(res.group(0)).encode('utf-8')).hexdigest()
rfile['day'] = int(res.group(3))
......
......@@ -129,9 +129,18 @@ class FTPDownload(DownloadInterface):
error = False
except Exception as e:
self.logger.error('Could not get errcode:' + str(e))
nbtry += 1
curl.close()
fp.close()
skip_check_uncompress = os.environ.get('UNCOMPRESS_SKIP_CHECK', None)
if not error and skip_check_uncompress is None:
archive_status = Utils.archive_check(file_path)
if not archive_status:
self.logger.error('Archive is invalid or corrupted, deleting file and retrying download')
error = True
if os.path.exists(file_path):
os.remove(file_path)
return error
def download(self, local_dir, keep_dirs=True):
......@@ -253,7 +262,7 @@ class FTPDownload(DownloadInterface):
encoding = None
if 'content-type' in self.headers:
content_type = self.headers['content-type'].lower()
match = re.search('charset=(\S+)', content_type)
match = re.search(r'charset=(\S+)', content_type)
if match:
encoding = match.group(1)
if encoding is None:
......@@ -288,7 +297,7 @@ class FTPDownload(DownloadInterface):
rfile['hash'] = hashlib.md5(line.encode('utf-8')).hexdigest()
try:
rfile['year'] = int(parts[7])
except Exception as e:
except Exception:
# specific ftp case issues at getting date info
curdate = datetime.now()
rfile['year'] = curdate.year
......
......@@ -17,7 +17,7 @@ except ImportError:
class HTTPParse(object):
def __init__(self, dir_line, file_line, dir_name=1, dir_date=2, file_name=1, file_date=2, file_date_format=None, file_size=3):
"""
r'''
http.parse.dir.line: <img[\s]+src="[\S]+"[\s]+alt="\[DIR\]"[\s]*/?>[\s]*<a[\s]+href="([\S]+)/"[\s]*>.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})
http.parse.file.line: <img[\s]+src="[\S]+"[\s]+alt="\[[\s]+\]"[\s]*/?>[\s]<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})
http.group.dir.name: 1
......@@ -25,7 +25,7 @@ class HTTPParse(object):
http.group.file.name: 1
http.group.file.date: 2
http.group.file.size: 3
"""
'''
self.dir_line = dir_line
self.file_line = file_line
self.dir_name = dir_name
......@@ -85,7 +85,7 @@ class HTTPDownload(FTPDownload):
encoding = None
if 'content-type' in self.headers:
content_type = self.headers['content-type'].lower()
match = re.search('charset=(\S+)', content_type)
match = re.search(r'charset=(\S+)', content_type)
if match:
encoding = match.group(1)
if encoding is None:
......@@ -96,14 +96,14 @@ class HTTPDownload(FTPDownload):
# lets get the output in a string
result = output.getvalue().decode(encoding)
'''
'http.parse.dir.line': r'<a[\s]+href="([\S]+)/".*alt="\[DIR\]">.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})',
'http.parse.file.line': r'<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})',
'http.group.dir.name': 1,
'http.group.dir.date': 2,
'http.group.file.name': 1,
'http.group.file.date': 2,
'http.group.file.size': 3,
r'''
http.parse.dir.line': r'<a[\s]+href="([\S]+)/".*alt="\[DIR\]">.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})',
http.parse.file.line': r'<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})',
http.group.dir.name': 1,
http.group.dir.date': 2,
http.group.file.name': 1,
http.group.file.date': 2,
http.group.file.size': 3,
'''
rfiles = []
......
......@@ -3,6 +3,7 @@ import os
from datetime import datetime
import time
from biomaj_core.utils import Utils
from biomaj_download.download.interface import DownloadInterface
from irods.session import iRODSSession
from irods.models import Collection, DataObject, User
......@@ -96,6 +97,15 @@ class IRODSDownload(DownloadInterface):
rfile['download_time'] = 0
rfile['error'] = True
raise Exception("IRODS:Download:Error:" + rfile['root'] + '/' + rfile['name'])
else:
archive_status = Utils.archive_check(file_path)
if not archive_status:
self.logger.error('Archive is invalid or corrupted, deleting file')
rfile['error'] = True
if os.path.exists(file_path):
os.remove(file_path)
raise Exception("IRODS:Download:Error:" + rfile['root'] + '/' + rfile['name'])
end_time = datetime.now()
end_time = time.mktime(end_time.timetuple())
rfile['download_time'] = end_time - start_time
......
......@@ -119,7 +119,7 @@ class RSYNCDownload(DownloadInterface):
rfile['save_as'] = rfile['name']
if keep_dirs:
file_dir = local_dir + '/' + os.path.dirname(rfile['save_as'])
if re.match('\S*\/$', file_dir):
if re.match(r'\S*\/$', file_dir):
file_path = file_dir + '/' + os.path.basename(rfile['save_as'])
else:
file_path = file_dir + os.path.basename(rfile['save_as'])
......
biomaj3-download (3.0.19-1) unstable; urgency=medium
* Team upload.
* New upstream version
* debhelper 12
* Standards-Version: 4.3.0
* Testsuite: autopkgtest-pkg-python
* Remove trailing whitespace in debian/changelog
* Remove trailing whitespace in debian/copyright
-- Andreas Tille <tille@debian.org> Wed, 30 Jan 2019 10:40:31 +0100
biomaj3-download (3.0.18-1) unstable; urgency=medium
[ Jelmer Vernooij ]
......@@ -5,7 +17,7 @@ biomaj3-download (3.0.18-1) unstable; urgency=medium
* Trim trailing whitespace.
[ Olivier Sallou ]
* New upstream release
* New upstream release
-- Olivier Sallou <osallou@debian.org> Thu, 25 Oct 2018 08:52:07 +0000
......
Source: biomaj3-download
Section: python
Priority: optional
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Uploaders: Olivier Sallou <osallou@debian.org>
Build-Depends: debhelper (>= 9), dh-python,
Section: python
Testsuite: autopkgtest-pkg-python
Priority: optional
Build-Depends: debhelper (>= 12~),
dh-python,
protobuf-compiler,
python3-all,
python3-bcrypt,
......@@ -23,17 +25,20 @@ Build-Depends: debhelper (>= 9), dh-python,
python3-yaml,
python3-biomaj3-core,
python3-biomaj3-zipkin
Standards-Version: 4.1.3
Homepage: https://github.com/genouest/biomaj-download
Standards-Version: 4.3.0
Vcs-Browser: https://salsa.debian.org/med-team/biomaj3-download
Vcs-Git: https://salsa.debian.org/med-team/biomaj3-download.git
Homepage: https://github.com/genouest/biomaj-download
Package: python3-biomaj3-download
Architecture: all
Depends: ${misc:Depends}, ${python3:Depends}
Depends: ${misc:Depends},
${python3:Depends}
Recommends: ${python3:Recommends}
Suggests: ${python3:Suggests}, python3-gunicorn, mongodb, redis-server
XB-Python-Egg-Name: biomaj-download
Suggests: ${python3:Suggests},
python3-gunicorn,
mongodb,
redis-server
Description: BioMAJ download management library
BioMAJ downloads remote data banks, checks their status and applies
transformation workflows, with consistent state, to provide ready-to-use
......@@ -45,3 +50,4 @@ Description: BioMAJ download management library
.
This package contains the library and microservice to manage downloads
in BioMAJ3
XB-Python-Egg-Name: biomaj-download
......@@ -424,7 +424,7 @@ License: AGPL-3
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
.
......@@ -537,7 +537,7 @@ License: AGPL-3
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
.
.
12. No Surrender of Others' Freedom.
.
If conditions are imposed on you (whether by court order, agreement or
......
......@@ -21,7 +21,7 @@ config = {
'url': 'http://biomaj.genouest.org',
'download_url': 'http://biomaj.genouest.org',
'author_email': 'olivier.sallou@irisa.fr',
'version': '3.0.18',
'version': '3.0.19',
'classifiers': [
# How mature is this project? Common values are
# 3 - Alpha
......@@ -46,7 +46,7 @@ config = {
'biomaj_zipkin',
'pycurl',
'py-bcrypt',
'pika',
'pika==0.11.2',
'redis',
'PyYAML',
'flask',
......
......@@ -443,9 +443,26 @@ class TestBiomajFTPDownload(unittest.TestCase):
(file_list, dir_list) = ftpd.list()
# ftpd.match([r'^alu.*\.gz$'], file_list, dir_list)
ftpd.match([r'^1.*KB\.zip$'], file_list, dir_list)
try:
ftpd.download(self.utils.data_dir)
except Exception:
self.assertTrue(1==1)
else:
self.assertTrue(1==0)
ftpd.close()
# self.assertTrue(len(ftpd.files_to_download) == 2)
def test_download_skip_uncompress_checks(self):
# ftpd = FTPDownload('ftp', 'ftp.ncbi.nih.gov', '/blast/db/FASTA/')
os.environ['UNCOMPRESS_SKIP_CHECK'] = "1"
ftpd = FTPDownload('ftp', 'speedtest.tele2.net', '/')
(file_list, dir_list) = ftpd.list()
# ftpd.match([r'^alu.*\.gz$'], file_list, dir_list)
ftpd.match([r'^1.*KB\.zip$'], file_list, dir_list)
ftpd.download(self.utils.data_dir)
ftpd.close()
self.assertTrue(len(ftpd.files_to_download) == 2)
del os.environ['UNCOMPRESS_SKIP_CHECK']
def test_download_in_subdir(self):
ftpd = FTPDownload('ftp', 'ftp.ncbi.nih.gov', '/blast/')
......@@ -577,7 +594,7 @@ class iRodsResult(object):
elif "COLL_NAME" in str(index):
return self.Collname
elif "D_OWNER_NAME" in str(index):
return self.Dataowner_name
return self.Dataowner_name
class MockiRODSSession(object):
......@@ -601,19 +618,19 @@ class MockiRODSSession(object):
return self.Collid
if "COLL_NAME" in str(index):
return self.Collname
def configure(self):
return MockiRODSSession()
def query(self,Collname, Dataname, Datasize, Dataowner_name, Datamodify_time):
return self
def all(self):
return self
def one(self):
return self
def filter(self,boo):
return self
......@@ -640,7 +657,7 @@ class TestBiomajIRODSDownload(unittest.TestCase):
self.curdir = os.path.dirname(os.path.realpath(__file__))
self.examples = os.path.join(self.curdir,'bank') + '/'
BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)
def tearDown(self):
self.utils.clean()
......@@ -657,4 +674,3 @@ class TestBiomajIRODSDownload(unittest.TestCase):
irodsd.set_offline_dir(self.utils.data_dir)
(files_list, dir_list) = irodsd.list()
self.assertTrue(len(files_list) != 0)