blob: ff3b3eeb0b36c3c3e4dcd08b0cd068be38dd2b71 [file] [log] [blame]
From 690c05ca495f1d55a469724c94e1551cbfa836f2 Mon Sep 17 00:00:00 2001
From: Rajesh Tailor <rajesh.tailor@nttdata.com>
Date: Wed, 4 Mar 2015 05:05:19 -0800
Subject: [PATCH] Delete orphaned instance files from compute nodes
While resizing/revert-resizing instance, if instance gets deleted
in between, then instance files remains either on the source or
destination compute node.
To address this issue, added a new periodic task
'_cleanup_incomplete_migrations' which takes care of deleting
instance files from source/destination compute nodes and then
mark migration record as failed so that it doesn't appear again
in the next periodic task run.
SecurityImpact
Closes-Bug: 1392527
Change-Id: I9866d8e32e99b9f907921f4b226edf7b62bd83a7
(cherry picked from commit 4655751cdd97a4b527a25c7c0a96044ba212cd19)
---
nova/compute/manager.py | 61 ++++++++++++++++++++++--
nova/tests/unit/compute/test_compute_mgr.py | 72 +++++++++++++++++++++++++++++
2 files changed, 129 insertions(+), 4 deletions(-)
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index bf5585e..24a5811 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -267,15 +267,21 @@ def errors_out_migration(function):
def decorated_function(self, context, *args, **kwargs):
try:
return function(self, context, *args, **kwargs)
- except Exception:
+ except Exception as ex:
with excutils.save_and_reraise_exception():
wrapped_func = utils.get_wrapped_function(function)
keyed_args = safe_utils.getcallargs(wrapped_func, context,
*args, **kwargs)
migration = keyed_args['migration']
- status = migration.status
- if status not in ['migrating', 'post-migrating']:
- return
+
+ # NOTE(rajesht): If InstanceNotFound error is thrown from
+ # decorated function, migration status should be set to
+ # 'error', without checking current migration status.
+ if not isinstance(ex, exception.InstanceNotFound):
+ status = migration.status
+ if status not in ['migrating', 'post-migrating']:
+ return
+
migration.status = 'error'
try:
with migration.obj_as_admin():
@@ -3727,6 +3733,7 @@ class ComputeManager(manager.Manager):
@wrap_exception()
@reverts_task_state
@wrap_instance_event
+ @errors_out_migration
@wrap_instance_fault
def revert_resize(self, context, instance, migration, reservations):
"""Destroys the new instance on the destination machine.
@@ -3783,6 +3790,7 @@ class ComputeManager(manager.Manager):
@wrap_exception()
@reverts_task_state
@wrap_instance_event
+ @errors_out_migration
@wrap_instance_fault
def finish_revert_resize(self, context, instance, reservations, migration):
"""Finishes the second half of reverting a resize.
@@ -6578,6 +6586,51 @@ class ComputeManager(manager.Manager):
with utils.temporary_mutation(context, read_deleted='yes'):
instance.save()
+ @periodic_task.periodic_task(spacing=CONF.instance_delete_interval)
+ def _cleanup_incomplete_migrations(self, context):
+ """Delete instance files on failed resize/revert-resize operation
+
+ During resize/revert-resize operation, if that instance gets deleted
+ in-between then instance files might remain either on source or
+ destination compute node because of race condition.
+ """
+ LOG.debug('Cleaning up deleted instances with incomplete migration ')
+ migration_filters = {'host': CONF.host,
+ 'status': 'error'}
+ migrations = objects.MigrationList.get_by_filters(context,
+ migration_filters)
+
+ if not migrations:
+ return
+
+ inst_uuid_from_migrations = set([migration.instance_uuid for migration
+ in migrations])
+
+ inst_filters = {'deleted': True, 'soft_deleted': False,
+ 'uuid': inst_uuid_from_migrations}
+ attrs = ['info_cache', 'security_groups', 'system_metadata']
+ with utils.temporary_mutation(context, read_deleted='yes'):
+ instances = objects.InstanceList.get_by_filters(
+ context, inst_filters, expected_attrs=attrs, use_slave=True)
+
+ for instance in instances:
+ if instance.host != CONF.host:
+ for migration in migrations:
+ if instance.uuid == migration.instance_uuid:
+ # Delete instance files if not cleanup properly either
+ # from the source or destination compute nodes when
+ # the instance is deleted during resizing.
+ self.driver.delete_instance_files(instance)
+ try:
+ migration.status = 'failed'
+ with migration.obj_as_admin():
+ migration.save()
+ except exception.MigrationNotFound:
+ LOG.warning(_LW("Migration %s is not found."),
+ migration.id, context=context,
+ instance=instance)
+ break
+
@messaging.expected_exceptions(exception.InstanceQuiesceNotSupported,
exception.NovaException,
NotImplementedError)
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index 4b7234e..ee1ab47 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -1374,6 +1374,78 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
self.assertFalse(c.cleaned)
self.assertEqual('1', c.system_metadata['clean_attempts'])
+ @mock.patch.object(objects.Migration, 'obj_as_admin')
+ @mock.patch.object(objects.Migration, 'save')
+ @mock.patch.object(objects.MigrationList, 'get_by_filters')
+ @mock.patch.object(objects.InstanceList, 'get_by_filters')
+ def _test_cleanup_incomplete_migrations(self, inst_host,
+ mock_inst_get_by_filters,
+ mock_migration_get_by_filters,
+ mock_save, mock_obj_as_admin):
+ def fake_inst(context, uuid, host):
+ inst = objects.Instance(context)
+ inst.uuid = uuid
+ inst.host = host
+ return inst
+
+ def fake_migration(uuid, status, inst_uuid, src_host, dest_host):
+ migration = objects.Migration()
+ migration.uuid = uuid
+ migration.status = status
+ migration.instance_uuid = inst_uuid
+ migration.source_compute = src_host
+ migration.dest_compute = dest_host
+ return migration
+
+ fake_instances = [fake_inst(self.context, '111', inst_host),
+ fake_inst(self.context, '222', inst_host)]
+
+ fake_migrations = [fake_migration('123', 'error', '111',
+ 'fake-host', 'fake-mini'),
+ fake_migration('456', 'error', '222',
+ 'fake-host', 'fake-mini')]
+
+ mock_migration_get_by_filters.return_value = fake_migrations
+ mock_inst_get_by_filters.return_value = fake_instances
+
+ with mock.patch.object(self.compute.driver, 'delete_instance_files'):
+ self.compute._cleanup_incomplete_migrations(self.context)
+
+ # Ensure that migration status is set to 'failed' after instance
+ # files deletion for those instances whose instance.host is not
+ # same as compute host where periodic task is running.
+ for inst in fake_instances:
+ if inst.host != CONF.host:
+ for mig in fake_migrations:
+ if inst.uuid == mig.instance_uuid:
+ self.assertEqual('failed', mig.status)
+
+ def test_cleanup_incomplete_migrations_dest_node(self):
+ """Test to ensure instance files are deleted from destination node.
+
+ If instance gets deleted during resizing/revert-resizing operation,
+ in that case instance files gets deleted from instance.host (source
+ host here), but there is possibility that instance files could be
+ present on destination node.
+ This test ensures that `_cleanup_incomplete_migration` periodic
+ task deletes orphaned instance files from destination compute node.
+ """
+ self.flags(host='fake-mini')
+ self._test_cleanup_incomplete_migrations('fake-host')
+
+ def test_cleanup_incomplete_migrations_source_node(self):
+ """Test to ensure instance files are deleted from source node.
+
+ If instance gets deleted during resizing/revert-resizing operation,
+ in that case instance files gets deleted from instance.host (dest
+ host here), but there is possibility that instance files could be
+ present on source node.
+ This test ensures that `_cleanup_incomplete_migration` periodic
+ task deletes orphaned instance files from source compute node.
+ """
+ self.flags(host='fake-host')
+ self._test_cleanup_incomplete_migrations('fake-mini')
+
def test_attach_interface_failure(self):
# Test that the fault methods are invoked when an attach fails
db_instance = fake_instance.fake_db_instance()
--
2.4.5