Commit 648dbbf6 authored by Brent Baude's avatar Brent Baude Committed by Scott Moser

Get Azure endpoint server from DHCP client

It is more efficient and cross-distribution safe to use the hooks function
from dhclient to obtain the Azure endpoint server (DHCP option 245).

This is done by providing shell scritps that are called by the hooks
infrastructure of both dhclient and NetworkManager.  The hooks then
invoke 'cloud-init dhclient-hook' that maintains json data
with the dhclient options in
/run/cloud-init/dhclient.hooks/<interface>.json .

The azure helper then pulls the value from
/run/cloud-init/dhclient.hooks/<interface>.json file(s). If that file does
not exist or the value is not present, it will then fall back to the
original method of scraping the dhcp client lease file.
parent bc2c3267
# vi: ts=4 expandtab
import json
import os
import tempfile
def atomic_write_file(path, content, mode='w'):
tf = None
tf = tempfile.NamedTemporaryFile(dir=os.path.dirname(path),
delete=False, mode=mode)
os.rename(, path)
except Exception as e:
if tf is not None:
raise e
def atomic_write_json(path, data):
return atomic_write_file(path, json.dumps(data, indent=1,
sort_keys=True) + "\n")
......@@ -25,7 +25,6 @@ import argparse
import json
import os
import sys
import tempfile
import time
import traceback
......@@ -47,6 +46,10 @@ from cloudinit.reporting import events
from cloudinit.settings import (PER_INSTANCE, PER_ALWAYS, PER_ONCE,
from cloudinit.atomic_helper import atomic_write_json
from cloudinit.dhclient_hook import LogDhclient
# Pretty little cheetah formatted welcome message template
WELCOME_MSG_TPL = ("Cloud-init v. ${version} running '${action}' at "
......@@ -452,22 +455,10 @@ def main_single(name, args):
return 0
def atomic_write_file(path, content, mode='w'):
tf = None
tf = tempfile.NamedTemporaryFile(dir=os.path.dirname(path),
delete=False, mode=mode)
os.rename(, path)
except Exception as e:
if tf is not None:
raise e
def atomic_write_json(path, data):
return atomic_write_file(path, json.dumps(data, indent=1) + "\n")
def dhclient_hook(name, args):
record = LogDhclient(args)
def status_wrapper(name, args, data_d=None, link_d=None):
......@@ -627,7 +618,6 @@ def main(sysv_args=None):
# This subcommand allows you to run a single module
parser_single = subparsers.add_parser('single',
help=('run a single module '))
parser_single.set_defaults(action=('single', main_single))
parser_single.add_argument("--name", '-n', action="store",
help="module name to run",
......@@ -644,6 +634,16 @@ def main(sysv_args=None):
' pass to this module'))
parser_single.set_defaults(action=('single', main_single))
parser_dhclient = subparsers.add_parser('dhclient-hook',
help=('run the dhclient hook'
'to record network info'))
help=('action taken on the interface'))
help=('the network interface being acted'
' upon'))
parser_dhclient.set_defaults(action=('dhclient_hook', dhclient_hook))
args = parser.parse_args(args=sysv_args)
......@@ -677,9 +677,18 @@ def main(sysv_args=None):
"running single module %s" %
report_on =
elif name == 'dhclient_hook':
rname, rdesc = ("dhclient-hook",
"running dhclient-hook module")
args.reporter = events.ReportEventStack(
rname, rdesc, reporting_enabled=report_on)
with args.reporter:
return util.log_time(
logfunc=LOG.debug, msg="cloud-init mode '%s'" % name,
get_uptime=True, func=functor, args=(name, args))
if __name__ == '__main__':
# vi: ts=4 expandtab
import os
from cloudinit.atomic_helper import atomic_write_json
from cloudinit import log as logging
from cloudinit import stages
LOG = logging.getLogger(__name__)
class LogDhclient(object):
def __init__(self, cli_args):
self.hooks_dir = self._get_hooks_dir()
self.net_interface = cli_args.net_interface
self.net_action = cli_args.net_action
self.hook_file = os.path.join(self.hooks_dir,
self.net_interface + ".json")
def _get_hooks_dir():
i = stages.Init()
return os.path.join(i.paths.get_runpath(), 'dhclient.hooks')
def check_hooks_dir(self):
if not os.path.exists(self.hooks_dir):
# If the action is down and the json file exists, we need to
# delete the file
if self.net_action is 'down' and os.path.exists(self.hook_file):
def get_vals(info):
new_info = {}
for k, v in info.items():
if k.startswith("DHCP4_") or k.startswith("new_"):
key = (k.replace('DHCP4_', '').replace('new_', '')).lower()
new_info[key] = v
return new_info
def record(self):
envs = os.environ
if self.hook_file is None:
atomic_write_json(self.hook_file, self.get_vals(envs))
LOG.debug("Wrote dhclient options in %s", self.hook_file)
......@@ -20,18 +20,17 @@ import base64
import contextlib
import crypt
import fnmatch
from functools import partial
import os
import os.path
import time
import xml.etree.ElementTree as ET
from xml.dom import minidom
from import get_metadata_from_fabric
import xml.etree.ElementTree as ET
from cloudinit import log as logging
from cloudinit.settings import PER_ALWAYS
from cloudinit import sources
from import get_metadata_from_fabric
from cloudinit import util
LOG = logging.getLogger(__name__)
......@@ -107,6 +106,8 @@ def temporary_hostname(temp_hostname, cfg, hostname_command='hostname'):
class DataSourceAzureNet(sources.DataSource):
FALLBACK_LEASE = '/var/lib/dhcp/dhclient.eth0.leases'
def __init__(self, sys_cfg, distro, paths):
sources.DataSource.__init__(self, sys_cfg, distro, paths)
self.seed_dir = os.path.join(paths.seed_dir, 'azure')
......@@ -115,6 +116,8 @@ class DataSourceAzureNet(sources.DataSource):
self.ds_cfg = util.mergemanydict([
util.get_cfg_by_path(sys_cfg, DS_CFG_PATH, {}),
self.dhclient_lease_file = self.paths.cfgs.get('dhclient_lease',
def __str__(self):
root = sources.DataSource.__str__(self)
......@@ -226,7 +229,9 @@ class DataSourceAzureNet(sources.DataSource):
write_files(ddir, files, dirmode=0o700)
if self.ds_cfg['agent_command'] == '__builtin__':
metadata_func = get_metadata_from_fabric
metadata_func = partial(get_metadata_from_fabric,
metadata_func = self.get_metadata_from_agent
import json
import logging
import os
import re
......@@ -6,6 +7,7 @@ import struct
import tempfile
import time
from cloudinit import stages
from contextlib import contextmanager
from xml.etree import ElementTree
......@@ -187,19 +189,32 @@ class WALinuxAgentShim(object):
' </Container>',
def __init__(self):
def __init__(self, fallback_lease_file=None):
LOG.debug('WALinuxAgentShim instantiated...')
self.endpoint = self.find_endpoint()
self.dhcpoptions = None
self._endpoint = None
self.openssl_manager = None
self.values = {}
self.lease_file = fallback_lease_file
def clean_up(self):
if self.openssl_manager is not None:
def get_ip_from_lease_value(lease_value):
unescaped_value = lease_value.replace('\\', '')
def _get_hooks_dir():
_paths = stages.Init()
return os.path.join(_paths.paths.get_runpath(), "dhclient.hooks")
def endpoint(self):
if self._endpoint is None:
self._endpoint = self.find_endpoint(self.lease_file)
return self._endpoint
def get_ip_from_lease_value(fallback_lease_value):
unescaped_value = fallback_lease_value.replace('\\', '')
if len(unescaped_value) > 4:
hex_string = ''
for hex_pair in unescaped_value.split(':'):
......@@ -213,15 +228,75 @@ class WALinuxAgentShim(object):
return socket.inet_ntoa(packed_bytes)
def find_endpoint():
LOG.debug('Finding Azure endpoint...')
content = util.load_file('/var/lib/dhcp/dhclient.eth0.leases')
value = None
def _get_value_from_leases_file(fallback_lease_file):
leases = []
content = util.load_file(fallback_lease_file)
LOG.debug("content is {}".format(content))
for line in content.splitlines():
if 'unknown-245' in line:
value = line.strip(' ').split(' ', 2)[-1].strip(';\n"')
# Example line from Ubuntu
# option unknown-245 a8:3f:81:10;
leases.append(line.strip(' ').split(' ', 2)[-1].strip(';\n"'))
# Return the "most recent" one in the list
if len(leases) < 1:
return None
return leases[-1]
def _load_dhclient_json():
dhcp_options = {}
hooks_dir = WALinuxAgentShim._get_hooks_dir()
if not os.path.exists(hooks_dir):
LOG.debug("%s not found.", hooks_dir)
return None
hook_files = [os.path.join(hooks_dir, x)
for x in os.listdir(hooks_dir)]
for hook_file in hook_files:
name = os.path.basename(hook_file).replace('.json', '')
dhcp_options[name] = json.loads(util.load_file((hook_file)))
except ValueError:
raise ValueError("%s is not valid JSON data", hook_file)
return dhcp_options
def _get_value_from_dhcpoptions(dhcp_options):
if dhcp_options is None:
return None
# the MS endpoint server is given to us as DHPC option 245
_value = None
for interface in dhcp_options:
_value = dhcp_options[interface].get('unknown_245', None)
if _value is not None:
LOG.debug("Endpoint server found in dhclient options")
return _value
def find_endpoint(fallback_lease_file=None):
LOG.debug('Finding Azure endpoint...')
value = None
# Option-245 stored in /run/cloud-init/dhclient.hooks/<ifc>.json
# a dhclient exit hook that calls cloud-init-dhclient-hook
dhcp_options = WALinuxAgentShim._load_dhclient_json()
value = WALinuxAgentShim._get_value_from_dhcpoptions(dhcp_options)
if value is None:
raise ValueError('No endpoint found in DHCP config.')
# Fallback and check the leases file if unsuccessful
LOG.debug("Unable to find endpoint in dhclient logs. "
" Falling back to check lease files")
if fallback_lease_file is None:
LOG.warn("No fallback lease file was specified.")
value = None
LOG.debug("Looking for endpoint in lease file %s",
value = WALinuxAgentShim._get_value_from_leases_file(
if value is None:
raise ValueError('No endpoint found.')
endpoint_ip_address = WALinuxAgentShim.get_ip_from_lease_value(value)
LOG.debug('Azure endpoint found at %s', endpoint_ip_address)
return endpoint_ip_address
......@@ -271,8 +346,8 @@ class WALinuxAgentShim(object):'Reported ready to Azure fabric.')
def get_metadata_from_fabric():
shim = WALinuxAgentShim()
def get_metadata_from_fabric(fallback_lease_file=None):
shim = WALinuxAgentShim(fallback_lease_file=fallback_lease_file)
return shim.register_with_azure_and_fetch_data()
......@@ -98,6 +98,7 @@ system_info:
cloud_dir: /var/lib/cloud/
templates_dir: /etc/cloud/templates/
upstart_dir: /etc/init/
- arches: [i386, amd64]
......@@ -114,3 +115,8 @@ system_info:
ssh_svcname: ssh
set_hostname: False
agent_command: __builtin__
......@@ -9,10 +9,34 @@ Azure Platform
The azure cloud-platform provides initial data to an instance via an attached
CD formated in UDF. That CD contains a 'ovf-env.xml' file that provides some
information. Additional information is obtained via interaction with the
"endpoint". The ip address of the endpoint is advertised to the instance
inside of dhcp option 245. On ubuntu, that can be seen in
/var/lib/dhcp/dhclient.eth0.leases as a colon delimited hex value (example:
``option unknown-245 64:41:60:82;`` is
To find the endpoint, we now leverage the dhcp client's ability to log its
known values on exit. The endpoint server is special DHCP option 245.
Depending on your networking stack, this can be done
by calling a script in /etc/dhcp/dhclient-exit-hooks or a file in
/etc/NetworkManager/dispatcher.d. Both of these call a sub-command
'dhclient_hook' of cloud-init itself. This sub-command will write the client
information in json format to /run/cloud-init/dhclient.hook/<interface>.json.
In order for cloud-init to leverage this method to find the endpoint, the
cloud.cfg file must contain:
set_hostname: False
agent_command: __builtin__
If those files are not available, the fallback is to check the leases file
for the endpoint server (again option 245).
You can define the path to the lease file with the 'dhclient_lease' configuration
value under system_info: and paths:. For example:
dhclient_lease: /var/lib/dhcp/dhclient.eth0.leases
If no configuration value is provided, the dhclient_lease value will fallback to
......@@ -176,6 +176,8 @@ else:
(ETC + '/cloud', glob('config/*.cfg')),
(ETC + '/cloud/cloud.cfg.d', glob('config/cloud.cfg.d/*')),
(ETC + '/cloud/templates', glob('templates/*')),
(ETC + '/NetworkManager/dispatcher.d/', ['tools/hook-network-manager']),
(ETC + '/dhcp/dhclient-exit-hooks.d/', ['tools/hook-dhclient']),
(USR_LIB_EXEC + '/cloud-init', ['tools/uncloud-init',
(USR + '/share/doc/cloud-init', [f for f in glob('doc/*') if is_f(f)]),
......@@ -54,13 +54,17 @@ class TestFindEndpoint(TestCase):
self.load_file = patches.enter_context(
mock.patch.object(azure_helper.util, 'load_file'))
self.dhcp_options = patches.enter_context(
def test_missing_file(self):
self.load_file.side_effect = IOError
def test_missing_special_azure_line(self):
self.load_file.return_value = ''
self.dhcp_options.return_value = {'eth0': {'key': 'value'}}
......@@ -72,13 +76,18 @@ class TestFindEndpoint(TestCase):
' option unknown-245 {0};'.format(encoded_address),
def test_from_dhcp_client(self):
self.dhcp_options.return_value = {"eth0": {"unknown_245": "5:4:3:2"}}
def test_latest_lease_used(self):
encoded_addresses = ['5:4:3:2', '4:3:2:1']
file_content = '\n'.join([self._build_lease_content(encoded_address)
for encoded_address in encoded_addresses])
self.load_file.return_value = file_content
self.assertEqual(encoded_addresses[-1].replace(':', '.'),
class TestExtractIpAddressFromLeaseValue(TestCase):
# This script writes DHCP lease information into the cloud-init run directory
# It is sourced, not executed. For more information see dhclient-script(8).
case "$reason" in
BOUND) cloud-init dhclient-hook up "$interface";;
cloud-init dhclient-hook down "$interface";;
# This script hooks into NetworkManager(8) via its scripts
# arguments are 'interface-name' and 'action'
case "$1:$2" in
*:up) exec cloud-init dhclient-hook up "$1";;
*:down) exec cloud-init dhclient-hook down "$1";;
# Current versions of RHEL and CentOS do not honor the directory
# /etc/dhcp/dhclient-exit-hooks.d so this file can be placed in
# /etc/dhcp/dhclient.d instead
cloud-init dhclient-hook up "$interface"
cloud-init dhclient-hook down "$interface"
