Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille
--- a/CHANGES.txt
+++ b/CHANGES.txt
+3.0.19:
+  Check archives after download
+  Fix python regexps syntax (deprecation)
 3.0.18:
  Rename protobuf and use specific package to avoid conflicts
 3.0.17:

--- a/biomaj_download/download/direct.py
+++ b/biomaj_download/download/direct.py
@@ -238,7 +238,7 @@ class DirectHttpDownload(DirectFTPDownload):
            encoding = None
            if 'content-type' in self.headers:
                content_type = self.headers['content-type'].lower()
-                match = re.search('charset=(\S+)', content_type)
+                match = re.search(r'charset=(\S+)', content_type)
                if match:
                    encoding = match.group(1)
            if encoding is None:
@@ -257,7 +257,7 @@ class DirectHttpDownload(DirectFTPDownload):
                    rfile['size'] = int(parts[1].strip())
                if parts[0].strip() == 'Last-Modified':
                    # Sun, 06 Nov 1994
-                    res = re.match('(\w+),\s+(\d+)\s+(\w+)\s+(\d+)', parts[1].strip())
+                    res = re.match(r'(\w+),\s+(\d+)\s+(\w+)\s+(\d+)', parts[1].strip())
                    if res:
                        rfile['hash'] = hashlib.md5(str(res.group(0)).encode('utf-8')).hexdigest()
                        rfile['day'] = int(res.group(2))
@@ -265,7 +265,7 @@ class DirectHttpDownload(DirectFTPDownload):
                        rfile['year'] = int(res.group(4))
                        continue
                    # Sunday, 06-Nov-94
-                    res = re.match('(\w+),\s+(\d+)-(\w+)-(\d+)', parts[1].strip())
+                    res = re.match(r'(\w+),\s+(\d+)-(\w+)-(\d+)', parts[1].strip())
                    if res:
                        rfile['hash'] = hashlib.md5(str(res.group(0)).encode('utf-8')).hexdigest()
                        rfile['day'] = int(res.group(2))
@@ -273,7 +273,7 @@ class DirectHttpDownload(DirectFTPDownload):
                        rfile['year'] = 2000 + int(res.group(4))
                        continue
                    # Sun Nov  6 08:49:37 1994
-                    res = re.match('(\w+)\s+(\w+)\s+(\d+)\s+\d{2}:\d{2}:\d{2}\s+(\d+)', parts[1].strip())
+                    res = re.match(r'(\w+)\s+(\w+)\s+(\d+)\s+\d{2}:\d{2}:\d{2}\s+(\d+)', parts[1].strip())
                    if res:
                        rfile['hash'] = hashlib.md5(str(res.group(0)).encode('utf-8')).hexdigest()
                        rfile['day'] = int(res.group(3))

--- a/biomaj_download/download/ftp.py
+++ b/biomaj_download/download/ftp.py
@@ -129,9 +129,18 @@ class FTPDownload(DownloadInterface):
                    error = False
            except Exception as e:
                self.logger.error('Could not get errcode:' + str(e))
+
            nbtry += 1
            curl.close()
            fp.close()
+            skip_check_uncompress = os.environ.get('UNCOMPRESS_SKIP_CHECK', None)
+            if not error and skip_check_uncompress is None:
+                archive_status = Utils.archive_check(file_path)
+                if not archive_status:
+                    self.logger.error('Archive is invalid or corrupted, deleting file and retrying download')
+                    error = True
+                    if os.path.exists(file_path):
+                        os.remove(file_path)
        return error

    def download(self, local_dir, keep_dirs=True):
@@ -253,7 +262,7 @@ class FTPDownload(DownloadInterface):
        encoding = None
        if 'content-type' in self.headers:
            content_type = self.headers['content-type'].lower()
-            match = re.search('charset=(\S+)', content_type)
+            match = re.search(r'charset=(\S+)', content_type)
            if match:
                encoding = match.group(1)
        if encoding is None:
@@ -288,7 +297,7 @@ class FTPDownload(DownloadInterface):
            rfile['hash'] = hashlib.md5(line.encode('utf-8')).hexdigest()
            try:
                rfile['year'] = int(parts[7])
-            except Exception as e:
+            except Exception:
                # specific ftp case issues at getting date info
                curdate = datetime.now()
                rfile['year'] = curdate.year

--- a/biomaj_download/download/http.py
+++ b/biomaj_download/download/http.py
@@ -17,7 +17,7 @@ except ImportError:
 class HTTPParse(object):

    def __init__(self, dir_line, file_line, dir_name=1, dir_date=2, file_name=1, file_date=2, file_date_format=None, file_size=3):
-        """
+        r'''
        http.parse.dir.line: <img[\s]+src="[\S]+"[\s]+alt="\[DIR\]"[\s]*/?>[\s]*<a[\s]+href="([\S]+)/"[\s]*>.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})
        http.parse.file.line: <img[\s]+src="[\S]+"[\s]+alt="\[[\s]+\]"[\s]*/?>[\s]<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})
        http.group.dir.name: 1
@@ -25,7 +25,7 @@ class HTTPParse(object):
        http.group.file.name: 1
        http.group.file.date: 2
        http.group.file.size: 3
-        """
+        '''
        self.dir_line = dir_line
        self.file_line = file_line
        self.dir_name = dir_name
@@ -85,7 +85,7 @@ class HTTPDownload(FTPDownload):
        encoding = None
        if 'content-type' in self.headers:
            content_type = self.headers['content-type'].lower()
-            match = re.search('charset=(\S+)', content_type)
+            match = re.search(r'charset=(\S+)', content_type)
            if match:
                encoding = match.group(1)
        if encoding is None:
@@ -96,14 +96,14 @@ class HTTPDownload(FTPDownload):

        # lets get the output in a string
        result = output.getvalue().decode(encoding)
-        '''
-        'http.parse.dir.line': r'<a[\s]+href="([\S]+)/".*alt="\[DIR\]">.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})',
-        'http.parse.file.line': r'<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})',
-        'http.group.dir.name': 1,
-        'http.group.dir.date': 2,
-        'http.group.file.name': 1,
-        'http.group.file.date': 2,
-        'http.group.file.size': 3,
+        r'''
+        http.parse.dir.line': r'<a[\s]+href="([\S]+)/".*alt="\[DIR\]">.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})',
+        http.parse.file.line': r'<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})',
+        http.group.dir.name': 1,
+        http.group.dir.date': 2,
+        http.group.file.name': 1,
+        http.group.file.date': 2,
+        http.group.file.size': 3,
        '''

        rfiles = []

--- a/biomaj_download/download/protocolirods.py
+++ b/biomaj_download/download/protocolirods.py
@@ -3,6 +3,7 @@ import os
 from datetime import datetime
 import time

+from biomaj_core.utils import Utils
 from biomaj_download.download.interface import DownloadInterface
 from irods.session import iRODSSession
 from irods.models import Collection, DataObject, User
@@ -96,6 +97,15 @@ class IRODSDownload(DownloadInterface):
                rfile['download_time'] = 0
                rfile['error'] = True
                raise Exception("IRODS:Download:Error:" + rfile['root'] + '/' + rfile['name'])
+            else:
+                archive_status = Utils.archive_check(file_path)
+                if not archive_status:
+                    self.logger.error('Archive is invalid or corrupted, deleting file')
+                    rfile['error'] = True
+                    if os.path.exists(file_path):
+                        os.remove(file_path)
+                    raise Exception("IRODS:Download:Error:" + rfile['root'] + '/' + rfile['name'])
+
            end_time = datetime.now()
            end_time = time.mktime(end_time.timetuple())
            rfile['download_time'] = end_time - start_time

--- a/biomaj_download/download/rsync.py
+++ b/biomaj_download/download/rsync.py
@@ -119,7 +119,7 @@ class RSYNCDownload(DownloadInterface):
                rfile['save_as'] = rfile['name']
            if keep_dirs:
                file_dir = local_dir + '/' + os.path.dirname(rfile['save_as'])
-            if re.match('\S*\/$', file_dir):
+            if re.match(r'\S*\/$', file_dir):
                file_path = file_dir + '/' + os.path.basename(rfile['save_as'])
            else:
                file_path = file_dir + os.path.basename(rfile['save_as'])

--- a/debian/changelog
+++ b/debian/changelog
+biomaj3-download (3.0.19-1) unstable; urgency=medium
+
+  * Team upload.
+  * New upstream version
+  * debhelper 12
+  * Standards-Version: 4.3.0
+  * Testsuite: autopkgtest-pkg-python
+  * Remove trailing whitespace in debian/changelog
+  * Remove trailing whitespace in debian/copyright
+
+ -- Andreas Tille <tille@debian.org>  Wed, 30 Jan 2019 10:40:31 +0100
+
 biomaj3-download (3.0.18-1) unstable; urgency=medium

  [ Jelmer Vernooĳ ]
@@ -5,7 +17,7 @@ biomaj3-download (3.0.18-1) unstable; urgency=medium
  * Trim trailing whitespace.

  [ Olivier Sallou ]
-  * New upstream release 
+  * New upstream release

 -- Olivier Sallou <osallou@debian.org>  Thu, 25 Oct 2018 08:52:07 +0000


--- a/debian/compat
+++ b/debian/compat
-9
+12
--- a/debian/control
+++ b/debian/control
 Source: biomaj3-download
-Section: python
-Priority: optional
 Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
 Uploaders: Olivier Sallou <osallou@debian.org>
-Build-Depends: debhelper (>= 9), dh-python,
+Section: python
+Testsuite: autopkgtest-pkg-python
+Priority: optional
+Build-Depends: debhelper (>= 12~),
+               dh-python,
               protobuf-compiler,
               python3-all,
               python3-bcrypt,
@@ -23,17 +25,20 @@ Build-Depends: debhelper (>= 9), dh-python,
               python3-yaml,
               python3-biomaj3-core,
               python3-biomaj3-zipkin
-Standards-Version: 4.1.3
-Homepage: https://github.com/genouest/biomaj-download
+Standards-Version: 4.3.0
 Vcs-Browser: https://salsa.debian.org/med-team/biomaj3-download
 Vcs-Git: https://salsa.debian.org/med-team/biomaj3-download.git
+Homepage: https://github.com/genouest/biomaj-download

 Package: python3-biomaj3-download
 Architecture: all
-Depends: ${misc:Depends}, ${python3:Depends}
+Depends: ${misc:Depends},
+         ${python3:Depends}
 Recommends: ${python3:Recommends}
-Suggests: ${python3:Suggests}, python3-gunicorn, mongodb, redis-server
-XB-Python-Egg-Name: biomaj-download
+Suggests: ${python3:Suggests},
+          python3-gunicorn,
+          mongodb,
+          redis-server
 Description: BioMAJ download management library
 BioMAJ downloads remote data banks, checks their status and applies
 transformation workflows, with consistent state, to provide ready-to-use
@@ -45,3 +50,4 @@ Description: BioMAJ download management library
 .
 This package contains the library and microservice to manage downloads
 in BioMAJ3
+XB-Python-Egg-Name: biomaj-download
--- a/debian/copyright
+++ b/debian/copyright
@@ -424,7 +424,7 @@ License: AGPL-3
  Moreover, your license from a particular copyright holder is
 reinstated permanently if the copyright holder notifies you of the
 violation by some reasonable means, this is the first time you have
- received notice of violation of this License (for any work) from that 
+ received notice of violation of this License (for any work) from that
 copyright holder, and you cure the violation prior to 30 days after
 your receipt of the notice.
 .
@@ -537,7 +537,7 @@ License: AGPL-3
  Nothing in this License shall be construed as excluding or limiting
 any implied license or other defenses to infringement that may
 otherwise be available to you under applicable patent law.
- . 
+ .
  12. No Surrender of Others' Freedom.
 .
  If conditions are imposed on you (whether by court order, agreement or

--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ mock
 nose
 pycurl
 py-bcrypt
-pika
+pika==0.11.2
 redis
 PyYAML
 protobuf

--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@ config = {
    'url': 'http://biomaj.genouest.org',
    'download_url': 'http://biomaj.genouest.org',
    'author_email': 'olivier.sallou@irisa.fr',
-    'version': '3.0.18',
+    'version': '3.0.19',
     'classifiers': [
        # How mature is this project? Common values are
        #   3 - Alpha
@@ -46,7 +46,7 @@ config = {
                         'biomaj_zipkin',
                         'pycurl',
                         'py-bcrypt',
-                         'pika',
+                         'pika==0.11.2',
                         'redis',
                         'PyYAML',
                         'flask',

--- a/tests/biomaj_tests.py
+++ b/tests/biomaj_tests.py
@@ -443,9 +443,26 @@ class TestBiomajFTPDownload(unittest.TestCase):
    (file_list, dir_list) = ftpd.list()
    # ftpd.match([r'^alu.*\.gz$'], file_list, dir_list)
    ftpd.match([r'^1.*KB\.zip$'], file_list, dir_list)
+    try:
+        ftpd.download(self.utils.data_dir)
+    except Exception:
+        self.assertTrue(1==1)
+    else:
+        self.assertTrue(1==0)
+    ftpd.close()
+    # self.assertTrue(len(ftpd.files_to_download) == 2)
+
+  def test_download_skip_uncompress_checks(self):
+    # ftpd = FTPDownload('ftp', 'ftp.ncbi.nih.gov', '/blast/db/FASTA/')
+    os.environ['UNCOMPRESS_SKIP_CHECK'] = "1"
+    ftpd = FTPDownload('ftp', 'speedtest.tele2.net', '/')
+    (file_list, dir_list) = ftpd.list()
+    # ftpd.match([r'^alu.*\.gz$'], file_list, dir_list)
+    ftpd.match([r'^1.*KB\.zip$'], file_list, dir_list)
    ftpd.download(self.utils.data_dir)
    ftpd.close()
    self.assertTrue(len(ftpd.files_to_download) == 2)
+    del os.environ['UNCOMPRESS_SKIP_CHECK']

  def test_download_in_subdir(self):
    ftpd = FTPDownload('ftp', 'ftp.ncbi.nih.gov', '/blast/')
@@ -577,7 +594,7 @@ class iRodsResult(object):
        elif "COLL_NAME" in str(index):
            return self.Collname
        elif "D_OWNER_NAME" in str(index):
-            return self.Dataowner_name    
+            return self.Dataowner_name


 class MockiRODSSession(object):
@@ -601,19 +618,19 @@ class MockiRODSSession(object):
            return self.Collid
        if "COLL_NAME" in str(index):
            return self.Collname
-    
+
    def configure(self):
        return MockiRODSSession()

    def query(self,Collname, Dataname, Datasize, Dataowner_name, Datamodify_time):
        return self
-    
+
    def all(self):
        return self

    def one(self):
        return self
-    
+
    def filter(self,boo):
        return self

@@ -640,7 +657,7 @@ class TestBiomajIRODSDownload(unittest.TestCase):
        self.curdir = os.path.dirname(os.path.realpath(__file__))
        self.examples = os.path.join(self.curdir,'bank') + '/'
        BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)
-        
+
    def tearDown(self):
        self.utils.clean()

@@ -657,4 +674,3 @@ class TestBiomajIRODSDownload(unittest.TestCase):
        irodsd.set_offline_dir(self.utils.data_dir)
        (files_list, dir_list) = irodsd.list()
        self.assertTrue(len(files_list) != 0)
-