url_helper.py 12.6 KB
Newer Older
Joshua Harlow's avatar
Joshua Harlow committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# vi: ts=4 expandtab
#
#    Copyright (C) 2012 Canonical Ltd.
#    Copyright (C) 2012 Hewlett-Packard Development Company, L.P.
#    Copyright (C) 2012 Yahoo! Inc.
#
#    Author: Scott Moser <scott.moser@canonical.com>
#    Author: Juerg Haefliger <juerg.haefliger@hp.com>
#    Author: Joshua Harlow <harlowja@yahoo-inc.com>
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License version 3, as
#    published by the Free Software Foundation.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

23
import httplib
24
import time
25
import urllib
26

27 28 29
import requests
from requests import exceptions

30
from urlparse import (urlparse, urlunparse)
31 32

from cloudinit import log as logging
33
from cloudinit import version
34 35 36

LOG = logging.getLogger(__name__)

37 38
NOT_FOUND = httplib.NOT_FOUND

39 40
# Check if requests has ssl support (added in requests >= 0.8.8)
SSL_ENABLED = False
Scott Moser's avatar
Scott Moser committed
41
CONFIG_ENABLED = False  # This was added in 0.7 (but taken out in >=1.0)
42
_REQ_VER = None
43 44
try:
    from distutils.version import LooseVersion
Scott Moser's avatar
Scott Moser committed
45
    import pkg_resources
46
    _REQ = pkg_resources.get_distribution('requests')
Scott Moser's avatar
Scott Moser committed
47
    _REQ_VER = LooseVersion(_REQ.version)  # pylint: disable=E1103
48 49
    if _REQ_VER >= LooseVersion('0.8.8'):
        SSL_ENABLED = True
50
    if _REQ_VER >= LooseVersion('0.7.0') and _REQ_VER < LooseVersion('1.0.0'):
51 52 53 54
        CONFIG_ENABLED = True
except:
    pass

55

56
def _cleanurl(url):
Scott Moser's avatar
Scott Moser committed
57
    parsed_url = list(urlparse(url, scheme='http'))  # pylint: disable=E1123
58 59 60 61 62 63
    if not parsed_url[1] and parsed_url[2]:
        # Swap these since this seems to be a common
        # occurrence when given urls like 'www.google.com'
        parsed_url[1] = parsed_url[2]
        parsed_url[2] = ''
    return urlunparse(parsed_url)
64 65


66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
def combine_url(base, *add_ons):

    def combine_single(url, add_on):
        url_parsed = list(urlparse(url))
        path = url_parsed[2]
        if path and not path.endswith("/"):
            path += "/"
        path += urllib.quote(str(add_on), safe="/:")
        url_parsed[2] = path
        return urlunparse(url_parsed)

    url = base
    for add_on in add_ons:
        url = combine_single(url, add_on)
    return url


83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
# Made to have same accessors as UrlResponse so that the
# read_file_or_url can return this or that object and the
# 'user' of those objects will not need to know the difference.
class StringResponse(object):
    def __init__(self, contents, code=200):
        self.code = code
        self.headers = {}
        self.contents = contents
        self.url = None

    def ok(self, *args, **kwargs):  # pylint: disable=W0613
        if self.code != 200:
            return False
        return True

    def __str__(self):
        return self.contents


class FileResponse(StringResponse):
    def __init__(self, path, contents, code=200):
        StringResponse.__init__(self, contents, code=code)
        self.url = path


108 109 110 111 112 113 114 115 116 117 118 119
class UrlResponse(object):
    def __init__(self, response):
        self._response = response

    @property
    def contents(self):
        return self._response.content

    @property
    def url(self):
        return self._response.url

120 121 122 123 124 125 126 127
    def ok(self, redirects_ok=False):
        upper = 300
        if redirects_ok:
            upper = 400
        if self.code >= 200 and self.code < upper:
            return True
        else:
            return False
128 129 130 131 132 133 134 135 136 137 138 139 140 141

    @property
    def headers(self):
        return self._response.headers

    @property
    def code(self):
        return self._response.status_code

    def __str__(self):
        return self.contents


class UrlError(IOError):
142
    def __init__(self, cause, code=None, headers=None):
143 144
        IOError.__init__(self, str(cause))
        self.cause = cause
145 146 147 148
        self.code = code
        self.headers = headers
        if self.headers is None:
            self.headers = {}
149 150


151 152
def _get_ssl_args(url, ssl_details):
    ssl_args = {}
Scott Moser's avatar
Scott Moser committed
153 154
    scheme = urlparse(url).scheme  # pylint: disable=E1101
    if scheme == 'https' and ssl_details:
155
        if not SSL_ENABLED:
156 157
            LOG.warn("SSL is not supported in requests v%s, "
                     "cert. verification can not occur!", _REQ_VER)
158 159
        else:
            if 'ca_certs' in ssl_details and ssl_details['ca_certs']:
160
                ssl_args['verify'] = ssl_details['ca_certs']
161
            else:
162
                ssl_args['verify'] = True
163
            if 'cert_file' in ssl_details and 'key_file' in ssl_details:
164
                ssl_args['cert'] = [ssl_details['cert_file'],
165
                                    ssl_details['key_file']]
166
            elif 'cert_file' in ssl_details:
167 168 169
                ssl_args['cert'] = str(ssl_details['cert_file'])
    return ssl_args

Scott Moser's avatar
Scott Moser committed
170

171
def readurl(url, data=None, timeout=None, retries=0, sec_between=1,
172 173
            headers=None, headers_cb=None, ssl_details=None,
            check_status=True, allow_redirects=True, exception_cb=None):
174 175 176 177 178
    url = _cleanurl(url)
    req_args = {
        'url': url,
    }
    req_args.update(_get_ssl_args(url, ssl_details))
179
    req_args['allow_redirects'] = allow_redirects
180
    req_args['method'] = 'GET'
181 182 183 184 185
    if timeout is not None:
        req_args['timeout'] = max(float(timeout), 0)
    if data:
        req_args['method'] = 'POST'
    # It doesn't seem like config
186 187
    # was added in older library versions (or newer ones either), thus we
    # need to manually do the retries if it wasn't...
188
    if CONFIG_ENABLED:
189 190 191 192 193 194 195 196
        req_config = {
            'store_cookies': False,
        }
        # Don't use the retry support built-in
        # since it doesn't allow for 'sleep_times'
        # in between tries....
        # if retries:
        #     req_config['max_retries'] = max(int(retries), 0)
197
        req_args['config'] = req_config
198 199 200
    manual_tries = 1
    if retries:
        manual_tries = max(int(retries) + 1, 1)
201 202 203 204
    if not headers:
        headers = {
            'User-Agent': 'Cloud-Init/%s' % (version.version_string()),
        }
205 206 207 208
    if not headers_cb:
        def _cb(url):
            return headers
        headers_cb = _cb
209 210
    if data:
        req_args['data'] = data
211 212
    if sec_between is None:
        sec_between = -1
213

214 215 216 217
    excps = []
    # Handle retrying ourselves since the built-in support
    # doesn't handle sleeping between tries...
    for i in range(0, manual_tries):
218 219 220 221 222 223
        req_args['headers'] = headers_cb(url)
        filtered_req_args = {}
        for (k, v) in req_args.items():
            if k == 'data':
                continue
            filtered_req_args[k] = v
224
        try:
225
            LOG.debug("[%s/%s] open '%s' with %s configuration", i,
226
                      manual_tries, url, filtered_req_args)
227

228
            r = requests.request(**req_args)
229
            if check_status:
Scott Moser's avatar
Scott Moser committed
230
                r.raise_for_status()  # pylint: disable=E1103
231
            LOG.debug("Read from %s (%s, %sb) after %s attempts", url,
Scott Moser's avatar
Scott Moser committed
232 233
                      r.status_code, len(r.content),  # pylint: disable=E1103
                      (i + 1))
234 235 236
            # Doesn't seem like we can make it use a different
            # subclass for responses, so add our own backward-compat
            # attrs
237
            return UrlResponse(r)
238
        except exceptions.RequestException as e:
239
            if (isinstance(e, (exceptions.HTTPError))
240 241
                    and hasattr(e, 'response')  # This appeared in v 0.10.8
                    and hasattr(e.response, 'status_code')):
242 243 244 245
                excps.append(UrlError(e, code=e.response.status_code,
                                      headers=e.response.headers))
            else:
                excps.append(UrlError(e))
246 247 248 249
                if SSL_ENABLED and isinstance(e, exceptions.SSLError):
                    # ssl exceptions are not going to get fixed by waiting a
                    # few seconds
                    break
Joshua Harlow's avatar
Joshua Harlow committed
250
            if exception_cb and not exception_cb(req_args.copy(), excps[-1]):
251
                break
252 253 254 255 256 257
            if i + 1 < manual_tries and sec_between > 0:
                LOG.debug("Please wait %s seconds while we wait to try again",
                          sec_between)
                time.sleep(sec_between)
    if excps:
        raise excps[-1]
Scott Moser's avatar
Scott Moser committed
258
    return None  # Should throw before this...
259 260 261


def wait_for_url(urls, max_wait=None, timeout=None,
262 263
                 status_cb=None, headers_cb=None, sleep_time=1,
                 exception_cb=None):
264 265 266 267 268
    """
    urls:      a list of urls to try
    max_wait:  roughly the maximum time to wait before giving up
               The max time is *actually* len(urls)*timeout as each url will
               be tried once and given the timeout provided.
269
               a number <= 0 will always result in only one try
270
    timeout:   the timeout provided to urlopen
271 272 273
    status_cb: call method with string message when a url is not available
    headers_cb: call method with single argument of url to get headers
                for request.
274 275
    exception_cb: call method with 2 arguments 'msg' (per status_cb) and
                  'exception', the exception that occurred.
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291

    the idea of this routine is to wait for the EC2 metdata service to
    come up.  On both Eucalyptus and EC2 we have seen the case where
    the instance hit the MD before the MD service was up.  EC2 seems
    to have permenantely fixed this, though.

    In openstack, the metadata service might be painfully slow, and
    unable to avoid hitting a timeout of even up to 10 seconds or more
    (LP: #894279) for a simple GET.

    Offset those needs with the need to not hang forever (and block boot)
    on a system where cloud-init is configured to look for EC2 Metadata
    service but is not going to find one.  It is possible that the instance
    data host (169.254.169.254) may be firewalled off Entirely for a sytem,
    meaning that the connection will block forever unless a timeout is set.
    """
292
    start_time = time.time()
293

294
    def log_status_cb(msg, exc=None):
295
        LOG.debug(msg)
296 297

    if status_cb is None:
298
        status_cb = log_status_cb
299

300
    def timeup(max_wait, start_time):
301
        return ((max_wait <= 0 or max_wait is None) or
302
                (time.time() - start_time > max_wait))
303 304 305

    loop_n = 0
    while True:
306
        sleep_time = int(loop_n / 5) + 1
307 308 309
        for url in urls:
            now = time.time()
            if loop_n != 0:
310
                if timeup(max_wait, start_time):
311
                    break
312
                if timeout and (now + timeout > (start_time + max_wait)):
313
                    # shorten timeout to not run way over max_time
314
                    timeout = int((start_time + max_wait) - now)
315 316

            reason = ""
317
            e = None
318 319 320 321 322 323
            try:
                if headers_cb is not None:
                    headers = headers_cb(url)
                else:
                    headers = {}

324 325 326 327 328 329 330 331 332 333
                response = readurl(url, headers=headers, timeout=timeout,
                                   check_status=False)
                if not response.contents:
                    reason = "empty response [%s]" % (response.code)
                    e = UrlError(ValueError(reason),
                                 code=response.code, headers=response.headers)
                elif not response.ok():
                    reason = "bad status code [%s]" % (response.code)
                    e = UrlError(ValueError(reason),
                                 code=response.code, headers=response.headers)
334 335
                else:
                    return url
336
            except UrlError as e:
337
                reason = "request error [%s]" % e
338 339 340
            except Exception as e:
                reason = "unexpected error [%s]" % e

341 342
            time_taken = int(time.time() - start_time)
            status_msg = "Calling '%s' failed [%s/%ss]: %s" % (url,
343 344 345
                                                               time_taken,
                                                               max_wait,
                                                               reason)
346
            status_cb(status_msg)
347
            if exception_cb:
348 349 350
                # This can be used to alter the headers that will be sent
                # in the future, for example this is what the MAAS datasource
                # does.
351
                exception_cb(msg=status_msg, exception=e)
352

353
        if timeup(max_wait, start_time):
354 355 356
            break

        loop_n = loop_n + 1
357 358
        LOG.debug("Please wait %s seconds while we wait to try again",
                  sleep_time)
359
        time.sleep(sleep_time)
360 361

    return False