| From 690c05ca495f1d55a469724c94e1551cbfa836f2 Mon Sep 17 00:00:00 2001 |
| From: Rajesh Tailor <rajesh.tailor@nttdata.com> |
| Date: Wed, 4 Mar 2015 05:05:19 -0800 |
| Subject: [PATCH] Delete orphaned instance files from compute nodes |
| |
| While resizing/revert-resizing instance, if instance gets deleted |
| in between, then instance files remains either on the source or |
| destination compute node. |
| |
| To address this issue, added a new periodic task |
| '_cleanup_incomplete_migrations' which takes care of deleting |
| instance files from source/destination compute nodes and then |
| mark migration record as failed so that it doesn't appear again |
| in the next periodic task run. |
| |
| SecurityImpact |
| |
| Closes-Bug: 1392527 |
| Change-Id: I9866d8e32e99b9f907921f4b226edf7b62bd83a7 |
| (cherry picked from commit 4655751cdd97a4b527a25c7c0a96044ba212cd19) |
| --- |
| nova/compute/manager.py | 61 ++++++++++++++++++++++-- |
| nova/tests/unit/compute/test_compute_mgr.py | 72 +++++++++++++++++++++++++++++ |
| 2 files changed, 129 insertions(+), 4 deletions(-) |
| |
| diff --git a/nova/compute/manager.py b/nova/compute/manager.py |
| index bf5585e..24a5811 100644 |
| --- a/nova/compute/manager.py |
| +++ b/nova/compute/manager.py |
| @@ -267,15 +267,21 @@ def errors_out_migration(function): |
| def decorated_function(self, context, *args, **kwargs): |
| try: |
| return function(self, context, *args, **kwargs) |
| - except Exception: |
| + except Exception as ex: |
| with excutils.save_and_reraise_exception(): |
| wrapped_func = utils.get_wrapped_function(function) |
| keyed_args = safe_utils.getcallargs(wrapped_func, context, |
| *args, **kwargs) |
| migration = keyed_args['migration'] |
| - status = migration.status |
| - if status not in ['migrating', 'post-migrating']: |
| - return |
| + |
| + # NOTE(rajesht): If InstanceNotFound error is thrown from |
| + # decorated function, migration status should be set to |
| + # 'error', without checking current migration status. |
| + if not isinstance(ex, exception.InstanceNotFound): |
| + status = migration.status |
| + if status not in ['migrating', 'post-migrating']: |
| + return |
| + |
| migration.status = 'error' |
| try: |
| with migration.obj_as_admin(): |
| @@ -3727,6 +3733,7 @@ class ComputeManager(manager.Manager): |
| @wrap_exception() |
| @reverts_task_state |
| @wrap_instance_event |
| + @errors_out_migration |
| @wrap_instance_fault |
| def revert_resize(self, context, instance, migration, reservations): |
| """Destroys the new instance on the destination machine. |
| @@ -3783,6 +3790,7 @@ class ComputeManager(manager.Manager): |
| @wrap_exception() |
| @reverts_task_state |
| @wrap_instance_event |
| + @errors_out_migration |
| @wrap_instance_fault |
| def finish_revert_resize(self, context, instance, reservations, migration): |
| """Finishes the second half of reverting a resize. |
| @@ -6578,6 +6586,51 @@ class ComputeManager(manager.Manager): |
| with utils.temporary_mutation(context, read_deleted='yes'): |
| instance.save() |
| |
| + @periodic_task.periodic_task(spacing=CONF.instance_delete_interval) |
| + def _cleanup_incomplete_migrations(self, context): |
| + """Delete instance files on failed resize/revert-resize operation |
| + |
| + During resize/revert-resize operation, if that instance gets deleted |
| + in-between then instance files might remain either on source or |
| + destination compute node because of race condition. |
| + """ |
| + LOG.debug('Cleaning up deleted instances with incomplete migration ') |
| + migration_filters = {'host': CONF.host, |
| + 'status': 'error'} |
| + migrations = objects.MigrationList.get_by_filters(context, |
| + migration_filters) |
| + |
| + if not migrations: |
| + return |
| + |
| + inst_uuid_from_migrations = set([migration.instance_uuid for migration |
| + in migrations]) |
| + |
| + inst_filters = {'deleted': True, 'soft_deleted': False, |
| + 'uuid': inst_uuid_from_migrations} |
| + attrs = ['info_cache', 'security_groups', 'system_metadata'] |
| + with utils.temporary_mutation(context, read_deleted='yes'): |
| + instances = objects.InstanceList.get_by_filters( |
| + context, inst_filters, expected_attrs=attrs, use_slave=True) |
| + |
| + for instance in instances: |
| + if instance.host != CONF.host: |
| + for migration in migrations: |
| + if instance.uuid == migration.instance_uuid: |
| + # Delete instance files if not cleanup properly either |
| + # from the source or destination compute nodes when |
| + # the instance is deleted during resizing. |
| + self.driver.delete_instance_files(instance) |
| + try: |
| + migration.status = 'failed' |
| + with migration.obj_as_admin(): |
| + migration.save() |
| + except exception.MigrationNotFound: |
| + LOG.warning(_LW("Migration %s is not found."), |
| + migration.id, context=context, |
| + instance=instance) |
| + break |
| + |
| @messaging.expected_exceptions(exception.InstanceQuiesceNotSupported, |
| exception.NovaException, |
| NotImplementedError) |
| diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py |
| index 4b7234e..ee1ab47 100644 |
| --- a/nova/tests/unit/compute/test_compute_mgr.py |
| +++ b/nova/tests/unit/compute/test_compute_mgr.py |
| @@ -1374,6 +1374,78 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase): |
| self.assertFalse(c.cleaned) |
| self.assertEqual('1', c.system_metadata['clean_attempts']) |
| |
| + @mock.patch.object(objects.Migration, 'obj_as_admin') |
| + @mock.patch.object(objects.Migration, 'save') |
| + @mock.patch.object(objects.MigrationList, 'get_by_filters') |
| + @mock.patch.object(objects.InstanceList, 'get_by_filters') |
| + def _test_cleanup_incomplete_migrations(self, inst_host, |
| + mock_inst_get_by_filters, |
| + mock_migration_get_by_filters, |
| + mock_save, mock_obj_as_admin): |
| + def fake_inst(context, uuid, host): |
| + inst = objects.Instance(context) |
| + inst.uuid = uuid |
| + inst.host = host |
| + return inst |
| + |
| + def fake_migration(uuid, status, inst_uuid, src_host, dest_host): |
| + migration = objects.Migration() |
| + migration.uuid = uuid |
| + migration.status = status |
| + migration.instance_uuid = inst_uuid |
| + migration.source_compute = src_host |
| + migration.dest_compute = dest_host |
| + return migration |
| + |
| + fake_instances = [fake_inst(self.context, '111', inst_host), |
| + fake_inst(self.context, '222', inst_host)] |
| + |
| + fake_migrations = [fake_migration('123', 'error', '111', |
| + 'fake-host', 'fake-mini'), |
| + fake_migration('456', 'error', '222', |
| + 'fake-host', 'fake-mini')] |
| + |
| + mock_migration_get_by_filters.return_value = fake_migrations |
| + mock_inst_get_by_filters.return_value = fake_instances |
| + |
| + with mock.patch.object(self.compute.driver, 'delete_instance_files'): |
| + self.compute._cleanup_incomplete_migrations(self.context) |
| + |
| + # Ensure that migration status is set to 'failed' after instance |
| + # files deletion for those instances whose instance.host is not |
| + # same as compute host where periodic task is running. |
| + for inst in fake_instances: |
| + if inst.host != CONF.host: |
| + for mig in fake_migrations: |
| + if inst.uuid == mig.instance_uuid: |
| + self.assertEqual('failed', mig.status) |
| + |
| + def test_cleanup_incomplete_migrations_dest_node(self): |
| + """Test to ensure instance files are deleted from destination node. |
| + |
| + If instance gets deleted during resizing/revert-resizing operation, |
| + in that case instance files gets deleted from instance.host (source |
| + host here), but there is possibility that instance files could be |
| + present on destination node. |
| + This test ensures that `_cleanup_incomplete_migration` periodic |
| + task deletes orphaned instance files from destination compute node. |
| + """ |
| + self.flags(host='fake-mini') |
| + self._test_cleanup_incomplete_migrations('fake-host') |
| + |
| + def test_cleanup_incomplete_migrations_source_node(self): |
| + """Test to ensure instance files are deleted from source node. |
| + |
| + If instance gets deleted during resizing/revert-resizing operation, |
| + in that case instance files gets deleted from instance.host (dest |
| + host here), but there is possibility that instance files could be |
| + present on source node. |
| + This test ensures that `_cleanup_incomplete_migration` periodic |
| + task deletes orphaned instance files from source compute node. |
| + """ |
| + self.flags(host='fake-host') |
| + self._test_cleanup_incomplete_migrations('fake-mini') |
| + |
| def test_attach_interface_failure(self): |
| # Test that the fault methods are invoked when an attach fails |
| db_instance = fake_instance.fake_db_instance() |
| -- |
| 2.4.5 |
| |
| |