azure: support bouncing interfaces to publish new hostname

See the added doc/sources/azure/README.rst for why this is necessary.
Essentially, we now are doing the following in the get_data() method
of azure datasource to publish this NewHostname:
 hostname NewHostName
 ifdown eth0; 
 ifup eth0

LP: #1202758
......@@ -31,9 +31,20 @@ LOG = logging.getLogger(__name__)
DS_NAME = 'Azure'
DEFAULT_METADATA = {"instance-id": "iid-AZURE-NODE"}
AGENT_START = ['service', 'walinuxagent', 'start']
BUILTIN_DS_CONFIG = {'datasource': {DS_NAME: {
'agent_command': AGENT_START,
'data_dir': "/var/lib/waagent"}}}
BOUNCE_COMMAND = ("i=$interface; x=0; ifdown $i || x=$?; "
"ifup $i || x=$?; exit $x")
'agent_command': AGENT_START,
'data_dir': "/var/lib/waagent",
'set_hostname': True,
'hostname_bounce': {
'interface': 'eth0',
'policy': True,
'command': BOUNCE_COMMAND,
'hostname_command': 'hostname',
DS_CFG_PATH = ['datasource', DS_NAME]
class DataSourceAzureNet(sources.DataSource):
......@@ -42,19 +53,19 @@ class DataSourceAzureNet(sources.DataSource):
self.seed_dir = os.path.join(paths.seed_dir, 'azure')
self.cfg = {}
self.seed = None
self.ds_cfg = util.mergemanydict([
util.get_cfg_by_path(sys_cfg, DS_CFG_PATH),
def __str__(self):
root = sources.DataSource.__str__(self)
return "%s [seed=%s]" % (root, self.seed)
def get_data(self):
ddir_cfgpath = ['datasource', DS_NAME, 'data_dir']
# azure removes/ejects the cdrom containing the ovf-env.xml
# file on reboot. So, in order to successfully reboot we
# need to look in the datadir and consider that valid
ddir = util.get_cfg_by_path(self.sys_cfg, ddir_cfgpath)
if ddir is None:
ddir = util.get_cfg_by_path(BUILTIN_DS_CONFIG, ddir_cfgpath)
ddir = self.ds_cfg['data_dir']
candidates = [self.seed_dir]
......@@ -91,36 +102,40 @@ class DataSourceAzureNet(sources.DataSource):
return False
if found == ddir:
LOG.debug("using cached datasource in %s", ddir)
fields = [('cmd', ['datasource', DS_NAME, 'agent_command']),
('datadir', ddir_cfgpath)]
mycfg = {}
for cfg in (self.cfg, self.sys_cfg, BUILTIN_DS_CONFIG):
for name, path in fields:
if name in mycfg:
value = util.get_cfg_by_path(cfg, keyp=path)
if value is not None:
mycfg[name] = value
LOG.debug("using files cached in %s", ddir)
# now update ds_cfg to reflect contents pass in config
usercfg = util.get_cfg_by_path(self.cfg, DS_CFG_PATH, {})
self.ds_cfg = util.mergemanydict([usercfg, self.ds_cfg])
mycfg = self.ds_cfg
# walinux agent writes files world readable, but expects
# the directory to be protected.
write_files(mycfg['datadir'], files, dirmode=0700)
write_files(mycfg['data_dir'], files, dirmode=0700)
# handle the hostname 'publishing'
except Exception as e:
LOG.warn("Failed publishing hostname: %s" % e)
util.logexc(LOG, "handling set_hostname failed")
except util.ProcessExecutionError:
# claim the datasource even if the command failed
util.logexc(LOG, "agent command '%s' failed.", mycfg['cmd'])
util.logexc(LOG, "agent command '%s' failed.",
shcfgxml = os.path.join(mycfg['datadir'], "SharedConfig.xml")
shcfgxml = os.path.join(mycfg['data_dir'], "SharedConfig.xml")
wait_for = [shcfgxml]
fp_files = []
for pk in self.cfg.get('_pubkeys', []):
bname = pk['fingerprint'] + ".crt"
fp_files += [os.path.join(mycfg['datadir'], bname)]
fp_files += [os.path.join(mycfg['data_dir'], bname)]
start = time.time()
missing = wait_for_files(wait_for + fp_files)
......@@ -148,6 +163,43 @@ class DataSourceAzureNet(sources.DataSource):
return self.cfg
def handle_set_hostname(enabled, hostname, cfg):
if not util.is_true(enabled):
if not hostname:
LOG.warn("set_hostname was true but no local-hostname")
apply_hostname_bounce(hostname=hostname, policy=cfg['policy'],
def apply_hostname_bounce(hostname, policy, interface, command,
# set the hostname to 'hostname' if it is not already set to that.
# then, if policy is not off, bounce the interface using command
prev_hostname = util.subp(hostname_command, capture=True)[0].strip()
util.subp([hostname_command, hostname])
if util.is_false(policy):
if prev_hostname == hostname and policy != "force":
env = os.environ.copy()
env['interface'] = interface
if command == "builtin":
util.subp(command, shell=(not isinstance(command, list)), capture=True)
def crtfile_to_pubkey(fname):
pipeline = ('openssl x509 -noout -pubkey < "$0" |'
'ssh-keygen -i -m PKCS8 -f /dev/stdin')
......@@ -319,15 +371,21 @@ def read_azure_ovf(contents):
name = child.localName.lower()
simple = False
value = ""
if (len(child.childNodes) == 1 and
child.childNodes[0].nodeType == dom.TEXT_NODE):
simple = True
value = child.childNodes[0].wholeText
attrs = {k: v for k, v in child.attributes.items()}
# we accept either UserData or CustomData. If both are present
# then behavior is undefined.
if (name == "userdata" or name == "customdata"):
ud = base64.b64decode(''.join(value.split()))
if attrs.get('encoding') in (None, "base64"):
ud = base64.b64decode(''.join(value.split()))
ud = value
elif name == "username":
username = value
elif name == "userpassword":
......@@ -335,7 +393,11 @@ def read_azure_ovf(contents):
elif name == "hostname":
md['local-hostname'] = value
elif name == "dscfg":
cfg['datasource'] = {DS_NAME: util.load_yaml(value, default={})}
if attrs.get('encoding') in (None, "base64"):
dscfg = base64.b64decode(''.join(value.split()))
dscfg = value
cfg['datasource'] = {DS_NAME: util.load_yaml(dscfg, default={})}
elif name == "ssh":
cfg['_pubkeys'] = load_azure_ovf_pubkeys(child)
elif name == "disablesshpasswordauthentication":
......@@ -45,6 +45,11 @@ datasource:
agent_command: [service, walinuxagent, start]
set_hostname: True
interface: eth0
policy: on # [can be 'on', 'off' or 'force']
# Smart OS datasource works over a serial console interacting with
Azure Datasource
This datasource finds metadata and user-data from the Azure cloud platform.
Azure Platform
The azure cloud-platform provides initial data to an instance via an attached
CD formated in UDF. That CD contains a 'ovf-env.xml' file that provides some
information. Additional information is obtained via interaction with the
"endpoint". The ip address of the endpoint is advertised to the instance
inside of dhcp option 245. On ubuntu, that can be seen in
/var/lib/dhcp/dhclient.eth0.leases as a colon delimited hex value (example:
``option unknown-245 64:41:60:82;`` is
In order to operate correctly, cloud-init needs walinuxagent to provide much
of the interaction with azure. In addition to "provisioning" code, walinux
does the following on the agent is a long running daemon that handles the
following things:
- generate a x509 certificate and send that to the endpoint
waagent.conf config
in order to use waagent.conf with cloud-init, the following settings are recommended. Other values can be changed or set to the defaults.
# disabling provisioning turns off all 'Provisioning.*' function
# this is currently not handled by cloud-init, so let walinuxagent do it.
Userdata is provided to cloud-init inside the ovf-env.xml file. Cloud-init
expects that user-data will be provided as base64 encoded value inside the
text child of a element named ``UserData`` or ``CustomData`` which is a direct
child of the ``LinuxProvisioningConfigurationSet`` (a sibling to ``UserName``)
If both ``UserData`` and ``CustomData`` are provided behavior is undefined on
which will be selected.
In the example below, user-data provided is 'this is my userdata', and the
datasource config provided is ``{"agent_command": ["start", "walinuxagent"]}``.
That agent command will take affect as if it were specified in system config.
.. code::
Configuration for the datasource can be read from the system config's or set
via the `dscfg` entry in the `LinuxProvisioningConfigurationSet`. Content in
dscfg node is expected to be base64 encoded yaml content, and it will be
merged into the 'datasource: Azure' entry.
The '``hostname_bounce: command``' entry can be either the literal string
'builtin' or a command to execute. The command will be invoked after the
hostname is set, and will have the 'interface' in its environment. If
``set_hostname`` is not true, then ``hostname_bounce`` will be ignored.
An example might be:
command: ["sh", "-c", "killall dhclient; dhclient $interface"]
.. code::
agent_command: [service, walinuxagent, start]
set_hostname: True
# the name of the interface to bounce
interface: eth0
# policy can be 'on', 'off' or 'force'
policy: on
# the method 'bounce' command.
command: "builtin"
hostname_command: "hostname"
When the user launches an instance, they provide a hostname for that instance.
The hostname is provided to the instance in the ovf-env.xml file as
Whatever value the instance provides in its dhcp request will resolve in the
domain returned in the 'search' request.
The interesting issue is that a generic image will already have a hostname
configured. The ubuntu cloud images have 'ubuntu' as the hostname of the
system, and the initial dhcp request on eth0 is not guaranteed to occur after
the datasource code has been run. So, on first boot, that initial value will
be sent in the dhcp request and *that* value will resolve.
In order to make the ``HostName`` provided in the ovf-env.xml resolve, a
dhcp request must be made with the new value. Walinuxagent (in its current
version) handles this by polling the state of hostname and bouncing ('``ifdown
eth0; ifup eth0``' the network interface if it sees that a change has been
cloud-init handles this by setting the hostname in the DataSource's 'get_data'
method via '``hostname $HostName``', and then bouncing the interface. This
behavior can be configured or disabled in the datasource config. See
'Configuration' above.
......@@ -26,8 +26,15 @@ def construct_valid_ovf_env(data=None, pubkeys=None, userdata=None):
for key, val in data.items():
content += "<%s>%s</%s>\n" % (key, val, key)
for key, dval in data.items():
if isinstance(dval, dict):
val = dval.get('text')
attrs = ' ' + ' '.join(["%s='%s'" % (k, v) for k, v in dval.items()
if k != 'text'])
val = dval
attrs = ""
content += "<%s%s>%s</%s>\n" % (key, attrs, val, key)
if userdata:
content += "<UserData>%s</UserData>\n" % (base64.b64encode(userdata))
......@@ -103,6 +110,9 @@ class TestAzureDataSource(MockerTestCase):
data['iid_from_shared_cfg'] = path
return 'i-my-azure-id'
def _apply_hostname_bounce(**kwargs):
data['apply_hostname_bounce'] = kwargs
if data.get('ovfcontent') is not None:
populate_dir(os.path.join(self.paths.seed_dir, "azure"),
{'ovf-env.xml': data['ovfcontent']})
......@@ -118,7 +128,9 @@ class TestAzureDataSource(MockerTestCase):
(mod, 'pubkeys_from_crt_files',
(mod, 'iid_from_shared_config',
_iid_from_shared_config), ])
(mod, 'apply_hostname_bounce',
_apply_hostname_bounce), ])
dsrc = mod.DataSourceAzureNet(
data.get('sys_cfg', {}), distro=None, paths=self.paths)
......@@ -139,11 +151,26 @@ class TestAzureDataSource(MockerTestCase):
self.assertEqual(0700, data['datadir_mode'])
self.assertEqual(dsrc.metadata['instance-id'], 'i-my-azure-id')
def test_user_cfg_set_agent_command_plain(self):
# set dscfg in via plaintext
cfg = {'agent_command': "my_command"}
odata = {'HostName': "myhost", 'UserName': "myuser",
'dscfg': {'text': yaml.dump(cfg), 'encoding': 'plain'}}
data = {'ovfcontent': construct_valid_ovf_env(data=odata)}
dsrc = self._get_ds(data)
ret = dsrc.get_data()
self.assertEqual(data['agent_invoked'], cfg['agent_command'])
def test_user_cfg_set_agent_command(self):
# set dscfg in via base64 encoded yaml
cfg = {'agent_command': "my_command"}
odata = {'HostName': "myhost", 'UserName': "myuser",
'dscfg': yaml.dump(cfg)}
'dscfg': {'text': base64.b64encode(yaml.dump(cfg)),
'encoding': 'base64'}}
data = {'ovfcontent': construct_valid_ovf_env(data=odata)}
dsrc = self._get_ds(data)
ret = dsrc.get_data()
......@@ -218,6 +245,48 @@ class TestAzureDataSource(MockerTestCase):
for mypk in mypklist:
self.assertIn(mypk, dsrc.cfg['_pubkeys'])
def test_disabled_bounce(self):
def test_apply_bounce_call_1(self):
# hostname needs to get through to apply_hostname_bounce
mydata = "FOOBAR"
odata = {'HostName': 'my-random-hostname'}
data = {'ovfcontent': construct_valid_ovf_env(data=odata)}
self.assertIn('hostname', data['apply_hostname_bounce'])
def test_apply_bounce_call_configurable(self):
# hostname_bounce should be configurable in datasource cfg
cfg = {'hostname_bounce': {'interface': 'eth1', 'policy': 'off',
'command': 'my-bounce-command',
'hostname_command': 'my-hostname-command'}}
odata = {'HostName': "xhost",
'dscfg': {'text': base64.b64encode(yaml.dump(cfg)),
'encoding': 'base64'}}
data = {'ovfcontent': construct_valid_ovf_env(data=odata)}
for k in cfg['hostname_bounce']:
self.assertIn(k, data['apply_hostname_bounce'])
for k, v in cfg['hostname_bounce'].items():
self.assertEqual(data['apply_hostname_bounce'][k], v)
def test_set_hostname_disabled(self):
# config specifying set_hostname off should not bounce
cfg = {'set_hostname': False}
odata = {'HostName': "xhost",
'dscfg': {'text': base64.b64encode(yaml.dump(cfg)),
'encoding': 'base64'}}
data = {'ovfcontent': construct_valid_ovf_env(data=odata)}
self.assertEqual(data.get('apply_hostname_bounce', "N/A"), "N/A")
class TestReadAzureOvf(MockerTestCase):
def test_invalid_xml_raises_non_azure_ds(self):
