How to use instance method in autotest

Best Python code snippet using autotest_python

manager.py

Source:manager.py Github

copy

Full Screen

...345 instances.346 """347 @functools.wraps(function)348 def decorated_function(self, context, *args, **kwargs):349 def _load_instance(instance_or_dict):350 if isinstance(instance_or_dict, dict):351 instance = objects.Instance._from_db_object(352 context, objects.Instance(), instance_or_dict,353 expected_attrs=metas)354 instance._context = context355 return instance356 return instance_or_dict357 metas = ['metadata', 'system_metadata']358 try:359 kwargs['instance'] = _load_instance(kwargs['instance'])360 except KeyError:361 args = (_load_instance(args[0]),) + args[1:]362 migration = kwargs.get('migration')363 if isinstance(migration, dict):364 migration = objects.Migration._from_db_object(365 context.elevated(), objects.Migration(),366 migration)367 kwargs['migration'] = migration368 return function(self, context, *args, **kwargs)369 return decorated_function370# TODO(danms): Remove me after Icehouse371def aggregate_object_compat(function):372 """Wraps a method that expects a new-world aggregate."""373 @functools.wraps(function)374 def decorated_function(self, context, *args, **kwargs):375 aggregate = kwargs.get('aggregate')376 if isinstance(aggregate, dict):377 aggregate = objects.Aggregate._from_db_object(378 context.elevated(), objects.Aggregate(),379 aggregate)380 kwargs['aggregate'] = aggregate381 return function(self, context, *args, **kwargs)382 return decorated_function383class InstanceEvents(object):384 def __init__(self):385 self._events = {}386 @staticmethod387 def _lock_name(instance):388 return '%s-%s' % (instance.uuid, 'events')389 def prepare_for_instance_event(self, instance, event_name):390 """Prepare to receive an event for an instance.391 This will register an event for the given instance that we will392 wait on later. This should be called before initiating whatever393 action will trigger the event. The resulting eventlet.event.Event394 object should be wait()'d on to ensure completion.395 :param instance: the instance for which the event will be generated396 :param event_name: the name of the event we're expecting397 :returns: an event object that should be wait()'d on398 """399 @utils.synchronized(self._lock_name(instance))400 def _create_or_get_event():401 if instance.uuid not in self._events:402 self._events.setdefault(instance.uuid, {})403 return self._events[instance.uuid].setdefault(404 event_name, eventlet.event.Event())405 LOG.debug('Preparing to wait for external event %(event)s',406 {'event': event_name}, instance=instance)407 return _create_or_get_event()408 def pop_instance_event(self, instance, event):409 """Remove a pending event from the wait list.410 This will remove a pending event from the wait list so that it411 can be used to signal the waiters to wake up.412 :param instance: the instance for which the event was generated413 :param event: the nova.objects.external_event.InstanceExternalEvent414 that describes the event415 :returns: the eventlet.event.Event object on which the waiters416 are blocked417 """418 no_events_sentinel = object()419 no_matching_event_sentinel = object()420 @utils.synchronized(self._lock_name(instance))421 def _pop_event():422 events = self._events.get(instance.uuid)423 if not events:424 return no_events_sentinel425 _event = events.pop(event.key, None)426 if not events:427 del self._events[instance.uuid]428 if _event is None:429 return no_matching_event_sentinel430 return _event431 result = _pop_event()432 if result == no_events_sentinel:433 LOG.debug('No waiting events found dispatching %(event)s',434 {'event': event.key},435 instance=instance)436 return None437 elif result == no_matching_event_sentinel:438 LOG.debug('No event matching %(event)s in %(events)s',439 {'event': event.key,440 'events': self._events.get(instance.uuid, {}).keys()},441 instance=instance)442 return None443 else:444 return result445 def clear_events_for_instance(self, instance):446 """Remove all pending events for an instance.447 This will remove all events currently pending for an instance448 and return them (indexed by event name).449 :param instance: the instance for which events should be purged450 :returns: a dictionary of {event_name: eventlet.event.Event}451 """452 @utils.synchronized(self._lock_name(instance))453 def _clear_events():454 # NOTE(danms): Use getitem syntax for the instance until455 # all the callers are using objects456 return self._events.pop(instance['uuid'], {})457 return _clear_events()458class ComputeVirtAPI(virtapi.VirtAPI):459 def __init__(self, compute):460 super(ComputeVirtAPI, self).__init__()461 self._compute = compute462 def provider_fw_rule_get_all(self, context):463 return self._compute.conductor_api.provider_fw_rule_get_all(context)464 def _default_error_callback(self, event_name, instance):465 raise exception.NovaException(_('Instance event failed'))466 @contextlib.contextmanager467 def wait_for_instance_event(self, instance, event_names, deadline=300,468 error_callback=None):469 """Plan to wait for some events, run some code, then wait.470 This context manager will first create plans to wait for the471 provided event_names, yield, and then wait for all the scheduled472 events to complete.473 Note that this uses an eventlet.timeout.Timeout to bound the474 operation, so callers should be prepared to catch that475 failure and handle that situation appropriately.476 If the event is not received by the specified timeout deadline,477 eventlet.timeout.Timeout is raised.478 If the event is received but did not have a 'completed'479 status, a NovaException is raised. If an error_callback is480 provided, instead of raising an exception as detailed above481 for the failure case, the callback will be called with the482 event_name and instance, and can return True to continue483 waiting for the rest of the events, False to stop processing,484 or raise an exception which will bubble up to the waiter.485 :param instance: The instance for which an event is expected486 :param event_names: A list of event names. Each element can be a487 string event name or tuple of strings to488 indicate (name, tag).489 :param deadline: Maximum number of seconds we should wait for all490 of the specified events to arrive.491 :param error_callback: A function to be called if an event arrives492 """493 if error_callback is None:494 error_callback = self._default_error_callback495 events = {}496 for event_name in event_names:497 if isinstance(event_name, tuple):498 name, tag = event_name499 event_name = objects.InstanceExternalEvent.make_key(500 name, tag)501 events[event_name] = (502 self._compute.instance_events.prepare_for_instance_event(503 instance, event_name))504 yield505 with eventlet.timeout.Timeout(deadline):506 for event_name, event in events.items():507 actual_event = event.wait()508 if actual_event.status == 'completed':509 continue510 decision = error_callback(event_name, instance)511 if decision is False:512 break513class ComputeManager(manager.Manager):514 """Manages the running instances from creation to destruction."""515 target = messaging.Target(version='3.34')516 # How long to wait in seconds before re-issuing a shutdown517 # signal to a instance during power off. The overall518 # time to wait is set by CONF.shutdown_timeout.519 SHUTDOWN_RETRY_INTERVAL = 10520 def __init__(self, compute_driver=None, *args, **kwargs):521 """Load configuration options and connect to the hypervisor."""522 self.virtapi = ComputeVirtAPI(self)523 self.network_api = network.API()524 self.volume_api = volume.API()525 self.image_api = image.API()526 self._last_host_check = 0527 self._last_bw_usage_poll = 0528 self._bw_usage_supported = True529 self._last_bw_usage_cell_update = 0530 self.compute_api = compute.API()531 self.compute_rpcapi = compute_rpcapi.ComputeAPI()532 self.conductor_api = conductor.API()533 self.compute_task_api = conductor.ComputeTaskAPI()534 self.is_neutron_security_groups = (535 openstack_driver.is_neutron_security_groups())536 self.consoleauth_rpcapi = consoleauth.rpcapi.ConsoleAuthAPI()537 self.cells_rpcapi = cells_rpcapi.CellsAPI()538 self.scheduler_rpcapi = scheduler_rpcapi.SchedulerAPI()539 self._resource_tracker_dict = {}540 self.instance_events = InstanceEvents()541 self._sync_power_pool = eventlet.GreenPool()542 self._syncs_in_progress = {}543 super(ComputeManager, self).__init__(service_name="compute",544 *args, **kwargs)545 # NOTE(russellb) Load the driver last. It may call back into the546 # compute manager via the virtapi, so we want it to be fully547 # initialized before that happens.548 self.driver = driver.load_compute_driver(self.virtapi, compute_driver)549 self.use_legacy_block_device_info = \550 self.driver.need_legacy_block_device_info551 def _get_resource_tracker(self, nodename):552 rt = self._resource_tracker_dict.get(nodename)553 if not rt:554 if not self.driver.node_is_available(nodename):555 raise exception.NovaException(556 _("%s is not a valid node managed by this "557 "compute host.") % nodename)558 rt = resource_tracker.ResourceTracker(self.host,559 self.driver,560 nodename)561 self._resource_tracker_dict[nodename] = rt562 return rt563 def _update_resource_tracker(self, context, instance):564 """Let the resource tracker know that an instance has changed state."""565 if (instance['host'] == self.host and566 self.driver.node_is_available(instance['node'])):567 rt = self._get_resource_tracker(instance.get('node'))568 rt.update_usage(context, instance)569 def _instance_update(self, context, instance_uuid, **kwargs):570 """Update an instance in the database using kwargs as value."""571 instance_ref = self.conductor_api.instance_update(context,572 instance_uuid,573 **kwargs)574 self._update_resource_tracker(context, instance_ref)575 return instance_ref576 def _set_instance_error_state(self, context, instance):577 instance_uuid = instance['uuid']578 try:579 self._instance_update(context, instance_uuid,580 vm_state=vm_states.ERROR)581 except exception.InstanceNotFound:582 LOG.debug('Instance has been destroyed from under us while '583 'trying to set it to ERROR',584 instance_uuid=instance_uuid)585 def _set_instance_obj_error_state(self, context, instance):586 try:587 instance.vm_state = vm_states.ERROR588 instance.save()589 except exception.InstanceNotFound:590 LOG.debug('Instance has been destroyed from under us while '591 'trying to set it to ERROR', instance=instance)592 def _get_instances_on_driver(self, context, filters=None):593 """Return a list of instance records for the instances found594 on the hypervisor which satisfy the specified filters. If filters=None595 return a list of instance records for all the instances found on the596 hypervisor.597 """598 if not filters:599 filters = {}600 try:601 driver_uuids = self.driver.list_instance_uuids()602 filters['uuid'] = driver_uuids603 local_instances = objects.InstanceList.get_by_filters(604 context, filters, use_slave=True)605 return local_instances606 except NotImplementedError:607 pass608 # The driver doesn't support uuids listing, so we'll have609 # to brute force.610 driver_instances = self.driver.list_instances()611 instances = objects.InstanceList.get_by_filters(context, filters,612 use_slave=True)613 name_map = dict((instance.name, instance) for instance in instances)614 local_instances = []615 for driver_instance in driver_instances:616 instance = name_map.get(driver_instance)617 if not instance:618 continue619 local_instances.append(instance)620 return local_instances621 def _destroy_evacuated_instances(self, context):622 """Destroys evacuated instances.623 While nova-compute was down, the instances running on it could be624 evacuated to another host. Check that the instances reported625 by the driver are still associated with this host. If they are626 not, destroy them, with the exception of instances which are in627 the MIGRATING, RESIZE_MIGRATING, RESIZE_MIGRATED, RESIZE_FINISH628 task state or RESIZED vm state.629 """630 our_host = self.host631 filters = {'deleted': False}632 local_instances = self._get_instances_on_driver(context, filters)633 for instance in local_instances:634 if instance.host != our_host:635 if (instance.task_state in [task_states.MIGRATING,636 task_states.RESIZE_MIGRATING,637 task_states.RESIZE_MIGRATED,638 task_states.RESIZE_FINISH]639 or instance.vm_state in [vm_states.RESIZED]):640 LOG.debug('Will not delete instance as its host ('641 '%(instance_host)s) is not equal to our '642 'host (%(our_host)s) but its task state is '643 '(%(task_state)s) and vm state is '644 '(%(vm_state)s)',645 {'instance_host': instance.host,646 'our_host': our_host,647 'task_state': instance.task_state,648 'vm_state': instance.vm_state},649 instance=instance)650 continue651 LOG.info(_('Deleting instance as its host ('652 '%(instance_host)s) is not equal to our '653 'host (%(our_host)s).'),654 {'instance_host': instance.host,655 'our_host': our_host}, instance=instance)656 try:657 network_info = self._get_instance_nw_info(context,658 instance)659 bdi = self._get_instance_block_device_info(context,660 instance)661 destroy_disks = not (self._is_instance_storage_shared(662 context, instance))663 except exception.InstanceNotFound:664 network_info = network_model.NetworkInfo()665 bdi = {}666 LOG.info(_('Instance has been marked deleted already, '667 'removing it from the hypervisor.'),668 instance=instance)669 # always destroy disks if the instance was deleted670 destroy_disks = True671 self.driver.destroy(context, instance,672 network_info,673 bdi, destroy_disks)674 def _is_instance_storage_shared(self, context, instance):675 shared_storage = True676 data = None677 try:678 data = self.driver.check_instance_shared_storage_local(context,679 instance)680 if data:681 shared_storage = (self.compute_rpcapi.682 check_instance_shared_storage(context,683 instance, data))684 except NotImplementedError:685 LOG.warning(_('Hypervisor driver does not support '686 'instance shared storage check, '687 'assuming it\'s not on shared storage'),688 instance=instance)689 shared_storage = False690 except Exception:691 LOG.exception(_LE('Failed to check if instance shared'),692 instance=instance)693 finally:694 if data:695 self.driver.check_instance_shared_storage_cleanup(context,696 data)697 return shared_storage698 def _complete_partial_deletion(self, context, instance):699 """Complete deletion for instances in DELETED status but not marked as700 deleted in the DB701 """702 instance.destroy()703 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(704 context, instance.uuid)705 quotas = objects.Quotas(context)706 project_id, user_id = quotas_obj.ids_from_instance(context, instance)707 quotas.reserve(context, project_id=project_id, user_id=user_id,708 instances=-1, cores=-instance.vcpus,709 ram=-instance.memory_mb)710 self._complete_deletion(context,711 instance,712 bdms,713 quotas,714 instance.system_metadata)715 def _complete_deletion(self, context, instance, bdms,716 quotas, system_meta):717 if quotas:718 quotas.commit()719 # ensure block device mappings are not leaked720 for bdm in bdms:721 bdm.destroy()722 self._notify_about_instance_usage(context, instance, "delete.end",723 system_metadata=system_meta)724 if CONF.vnc_enabled or CONF.spice.enabled:725 if CONF.cells.enable:726 self.cells_rpcapi.consoleauth_delete_tokens(context,727 instance.uuid)728 else:729 self.consoleauth_rpcapi.delete_tokens_for_instance(context,730 instance.uuid)731 def _init_instance(self, context, instance):732 '''Initialize this instance during service init.'''733 # Instances that are shut down, or in an error state can not be734 # initialized and are not attempted to be recovered. The exception735 # to this are instances that are in RESIZE_MIGRATING or DELETING,736 # which are dealt with further down.737 if (instance.vm_state == vm_states.SOFT_DELETED or738 (instance.vm_state == vm_states.ERROR and739 instance.task_state not in740 (task_states.RESIZE_MIGRATING, task_states.DELETING))):741 LOG.debug("Instance is in %s state.",742 instance.vm_state, instance=instance)743 return744 if instance.vm_state == vm_states.DELETED:745 try:746 self._complete_partial_deletion(context, instance)747 except Exception:748 # we don't want that an exception blocks the init_host749 msg = _LE('Failed to complete a deletion')750 LOG.exception(msg, instance=instance)751 return752 if (instance.vm_state == vm_states.BUILDING or753 instance.task_state in [task_states.SCHEDULING,754 task_states.BLOCK_DEVICE_MAPPING,755 task_states.NETWORKING,756 task_states.SPAWNING]):757 # NOTE(dave-mcnally) compute stopped before instance was fully758 # spawned so set to ERROR state. This is safe to do as the state759 # may be set by the api but the host is not so if we get here the760 # instance has already been scheduled to this particular host.761 LOG.debug("Instance failed to spawn correctly, "762 "setting to ERROR state", instance=instance)763 instance.task_state = None764 instance.vm_state = vm_states.ERROR765 instance.save()766 return767 if (instance.vm_state in [vm_states.ACTIVE, vm_states.STOPPED] and768 instance.task_state in [task_states.REBUILDING,769 task_states.REBUILD_BLOCK_DEVICE_MAPPING,770 task_states.REBUILD_SPAWNING]):771 # NOTE(jichenjc) compute stopped before instance was fully772 # spawned so set to ERROR state. This is consistent to BUILD773 LOG.debug("Instance failed to rebuild correctly, "774 "setting to ERROR state", instance=instance)775 instance.task_state = None776 instance.vm_state = vm_states.ERROR777 instance.save()778 return779 if (instance.vm_state != vm_states.ERROR and780 instance.task_state in [task_states.IMAGE_SNAPSHOT_PENDING,781 task_states.IMAGE_PENDING_UPLOAD,782 task_states.IMAGE_UPLOADING,783 task_states.IMAGE_SNAPSHOT]):784 LOG.debug("Instance in transitional state %s at start-up "785 "clearing task state",786 instance['task_state'], instance=instance)787 try:788 self._post_interrupted_snapshot_cleanup(context, instance)789 except Exception:790 # we don't want that an exception blocks the init_host791 msg = _LE('Failed to cleanup snapshot.')792 LOG.exception(msg, instance=instance)793 instance.task_state = None794 instance.save()795 if instance.task_state == task_states.DELETING:796 try:797 LOG.info(_('Service started deleting the instance during '798 'the previous run, but did not finish. Restarting '799 'the deletion now.'), instance=instance)800 instance.obj_load_attr('metadata')801 instance.obj_load_attr('system_metadata')802 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(803 context, instance.uuid)804 # FIXME(comstud): This needs fixed. We should be creating805 # reservations and updating quotas, because quotas806 # wouldn't have been updated for this instance since it is807 # still in DELETING. See bug 1296414.808 #809 # Create a dummy quota object for now.810 quotas = objects.Quotas.from_reservations(811 context, None, instance=instance)812 self._delete_instance(context, instance, bdms, quotas)813 except Exception:814 # we don't want that an exception blocks the init_host815 msg = _LE('Failed to complete a deletion')816 LOG.exception(msg, instance=instance)817 self._set_instance_error_state(context, instance)818 return819 try_reboot, reboot_type = self._retry_reboot(context, instance)820 current_power_state = self._get_power_state(context, instance)821 if try_reboot:822 LOG.debug("Instance in transitional state (%(task_state)s) at "823 "start-up and power state is (%(power_state)s), "824 "triggering reboot",825 {'task_state': instance['task_state'],826 'power_state': current_power_state},827 instance=instance)828 self.compute_rpcapi.reboot_instance(context, instance,829 block_device_info=None,830 reboot_type=reboot_type)831 return832 elif (current_power_state == power_state.RUNNING and833 instance.task_state in [task_states.REBOOT_STARTED,834 task_states.REBOOT_STARTED_HARD]):835 LOG.warning(_("Instance in transitional state "836 "(%(task_state)s) at start-up and power state "837 "is (%(power_state)s), clearing task state"),838 {'task_state': instance['task_state'],839 'power_state': current_power_state},840 instance=instance)841 instance.task_state = None842 instance.vm_state = vm_states.ACTIVE843 instance.save()844 if instance.task_state == task_states.POWERING_OFF:845 try:846 LOG.debug("Instance in transitional state %s at start-up "847 "retrying stop request",848 instance['task_state'], instance=instance)849 self.stop_instance(context, instance)850 except Exception:851 # we don't want that an exception blocks the init_host852 msg = _LE('Failed to stop instance')853 LOG.exception(msg, instance=instance)854 return855 if instance.task_state == task_states.POWERING_ON:856 try:857 LOG.debug("Instance in transitional state %s at start-up "858 "retrying start request",859 instance['task_state'], instance=instance)860 self.start_instance(context, instance)861 except Exception:862 # we don't want that an exception blocks the init_host863 msg = _LE('Failed to start instance')864 LOG.exception(msg, instance=instance)865 return866 net_info = compute_utils.get_nw_info_for_instance(instance)867 try:868 self.driver.plug_vifs(instance, net_info)869 except NotImplementedError as e:870 LOG.debug(e, instance=instance)871 if instance.task_state == task_states.RESIZE_MIGRATING:872 # We crashed during resize/migration, so roll back for safety873 try:874 # NOTE(mriedem): check old_vm_state for STOPPED here, if it's875 # not in system_metadata we default to True for backwards876 # compatibility877 power_on = (instance.system_metadata.get('old_vm_state') !=878 vm_states.STOPPED)879 block_dev_info = self._get_instance_block_device_info(context,880 instance)881 self.driver.finish_revert_migration(context,882 instance, net_info, block_dev_info, power_on)883 except Exception as e:884 LOG.exception(_LE('Failed to revert crashed migration'),885 instance=instance)886 finally:887 LOG.info(_('Instance found in migrating state during '888 'startup. Resetting task_state'),889 instance=instance)890 instance.task_state = None891 instance.save()892 if instance.task_state == task_states.MIGRATING:893 # Live migration did not complete, but instance is on this894 # host, so reset the state.895 instance.task_state = None896 instance.save(expected_task_state=[task_states.MIGRATING])897 db_state = instance.power_state898 drv_state = self._get_power_state(context, instance)899 expect_running = (db_state == power_state.RUNNING and900 drv_state != db_state)901 LOG.debug('Current state is %(drv_state)s, state in DB is '902 '%(db_state)s.',903 {'drv_state': drv_state, 'db_state': db_state},904 instance=instance)905 if expect_running and CONF.resume_guests_state_on_host_boot:906 LOG.info(_('Rebooting instance after nova-compute restart.'),907 instance=instance)908 block_device_info = \909 self._get_instance_block_device_info(context, instance)910 try:911 self.driver.resume_state_on_host_boot(912 context, instance, net_info, block_device_info)913 except NotImplementedError:914 LOG.warning(_('Hypervisor driver does not support '915 'resume guests'), instance=instance)916 except Exception:917 # NOTE(vish): The instance failed to resume, so we set the918 # instance to error and attempt to continue.919 LOG.warning(_('Failed to resume instance'), instance=instance)920 self._set_instance_error_state(context, instance)921 elif drv_state == power_state.RUNNING:922 # VMwareAPI drivers will raise an exception923 try:924 self.driver.ensure_filtering_rules_for_instance(925 instance, net_info)926 except NotImplementedError:927 LOG.warning(_('Hypervisor driver does not support '928 'firewall rules'), instance=instance)929 def _retry_reboot(self, context, instance):930 current_power_state = self._get_power_state(context, instance)931 current_task_state = instance.task_state932 retry_reboot = False933 reboot_type = compute_utils.get_reboot_type(current_task_state,934 current_power_state)935 pending_soft = (current_task_state == task_states.REBOOT_PENDING and936 instance.vm_state in vm_states.ALLOW_SOFT_REBOOT)937 pending_hard = (current_task_state == task_states.REBOOT_PENDING_HARD938 and instance.vm_state in vm_states.ALLOW_HARD_REBOOT)939 started_not_running = (current_task_state in940 [task_states.REBOOT_STARTED,941 task_states.REBOOT_STARTED_HARD] and942 current_power_state != power_state.RUNNING)943 if pending_soft or pending_hard or started_not_running:944 retry_reboot = True945 return retry_reboot, reboot_type946 def handle_lifecycle_event(self, event):947 LOG.info(_("VM %(state)s (Lifecycle Event)") %948 {'state': event.get_name()},949 instance_uuid=event.get_instance_uuid())950 context = nova.context.get_admin_context(read_deleted='yes')951 instance = objects.Instance.get_by_uuid(context,952 event.get_instance_uuid())953 vm_power_state = None954 if event.get_transition() == virtevent.EVENT_LIFECYCLE_STOPPED:955 vm_power_state = power_state.SHUTDOWN956 elif event.get_transition() == virtevent.EVENT_LIFECYCLE_STARTED:957 vm_power_state = power_state.RUNNING958 elif event.get_transition() == virtevent.EVENT_LIFECYCLE_PAUSED:959 vm_power_state = power_state.PAUSED960 elif event.get_transition() == virtevent.EVENT_LIFECYCLE_RESUMED:961 vm_power_state = power_state.RUNNING962 else:963 LOG.warning(_("Unexpected power state %d") %964 event.get_transition())965 if vm_power_state is not None:966 LOG.debug('Synchronizing instance power state after lifecycle '967 'event "%(event)s"; current vm_state: %(vm_state)s, '968 'current task_state: %(task_state)s, current DB '969 'power_state: %(db_power_state)s, VM power_state: '970 '%(vm_power_state)s',971 dict(event=event.get_name(),972 vm_state=instance.vm_state,973 task_state=instance.task_state,974 db_power_state=instance.power_state,975 vm_power_state=vm_power_state),976 instance_uuid=instance.uuid)977 self._sync_instance_power_state(context,978 instance,979 vm_power_state)980 def handle_events(self, event):981 if isinstance(event, virtevent.LifecycleEvent):982 try:983 self.handle_lifecycle_event(event)984 except exception.InstanceNotFound:985 LOG.debug("Event %s arrived for non-existent instance. The "986 "instance was probably deleted.", event)987 else:988 LOG.debug("Ignoring event %s", event)989 def init_virt_events(self):990 self.driver.register_event_listener(self.handle_events)991 def init_host(self):992 """Initialization for a standalone compute service."""993 self.driver.init_host(host=self.host)994 context = nova.context.get_admin_context()995 instances = objects.InstanceList.get_by_host(996 context, self.host, expected_attrs=['info_cache'])997 if CONF.defer_iptables_apply:998 self.driver.filter_defer_apply_on()999 self.init_virt_events()1000 try:1001 # checking that instance was not already evacuated to other host1002 self._destroy_evacuated_instances(context)1003 for instance in instances:1004 self._init_instance(context, instance)1005 finally:1006 if CONF.defer_iptables_apply:1007 self.driver.filter_defer_apply_off()1008 def cleanup_host(self):1009 self.driver.cleanup_host(host=self.host)1010 def pre_start_hook(self):1011 """After the service is initialized, but before we fully bring1012 the service up by listening on RPC queues, make sure to update1013 our available resources (and indirectly our available nodes).1014 """1015 self.update_available_resource(nova.context.get_admin_context())1016 def _get_power_state(self, context, instance):1017 """Retrieve the power state for the given instance."""1018 LOG.debug('Checking state', instance=instance)1019 try:1020 return self.driver.get_info(instance)["state"]1021 except exception.NotFound:1022 return power_state.NOSTATE1023 def get_console_topic(self, context):1024 """Retrieves the console host for a project on this host.1025 Currently this is just set in the flags for each compute host.1026 """1027 # TODO(mdragon): perhaps make this variable by console_type?1028 return '%s.%s' % (CONF.console_topic, CONF.console_host)1029 def get_console_pool_info(self, context, console_type):1030 return self.driver.get_console_pool_info(console_type)1031 @wrap_exception()1032 def refresh_security_group_rules(self, context, security_group_id):1033 """Tell the virtualization driver to refresh security group rules.1034 Passes straight through to the virtualization driver.1035 """1036 return self.driver.refresh_security_group_rules(security_group_id)1037 @wrap_exception()1038 def refresh_security_group_members(self, context, security_group_id):1039 """Tell the virtualization driver to refresh security group members.1040 Passes straight through to the virtualization driver.1041 """1042 return self.driver.refresh_security_group_members(security_group_id)1043 @wrap_exception()1044 def refresh_instance_security_rules(self, context, instance):1045 """Tell the virtualization driver to refresh security rules for1046 an instance.1047 Passes straight through to the virtualization driver.1048 Synchronise the call because we may still be in the middle of1049 creating the instance.1050 """1051 @utils.synchronized(instance['uuid'])1052 def _sync_refresh():1053 try:1054 return self.driver.refresh_instance_security_rules(instance)1055 except NotImplementedError:1056 LOG.warning(_('Hypervisor driver does not support '1057 'security groups.'), instance=instance)1058 return _sync_refresh()1059 @wrap_exception()1060 def refresh_provider_fw_rules(self, context):1061 """This call passes straight through to the virtualization driver."""1062 return self.driver.refresh_provider_fw_rules()1063 def _get_instance_nw_info(self, context, instance, use_slave=False):1064 """Get a list of dictionaries of network data of an instance."""1065 if (not hasattr(instance, 'system_metadata') or1066 len(instance['system_metadata']) == 0):1067 # NOTE(danms): Several places in the code look up instances without1068 # pulling system_metadata for performance, and call this function.1069 # If we get an instance without it, re-fetch so that the call1070 # to network_api (which requires it for instance_type) will1071 # succeed.1072 instance = objects.Instance.get_by_uuid(context,1073 instance['uuid'],1074 use_slave=use_slave)1075 network_info = self.network_api.get_instance_nw_info(context,1076 instance)1077 return network_info1078 def _await_block_device_map_created(self, context, vol_id):1079 # TODO(yamahata): creating volume simultaneously1080 # reduces creation time?1081 # TODO(yamahata): eliminate dumb polling1082 start = time.time()1083 retries = CONF.block_device_allocate_retries1084 if retries < 0:1085 LOG.warn(_LW("Treating negative config value (%(retries)s) for "1086 "'block_device_retries' as 0."),1087 {'retries': retries})1088 # (1) treat negative config value as 01089 # (2) the configured value is 0, one attempt should be made1090 # (3) the configured value is > 0, then the total number attempts1091 # is (retries + 1)1092 attempts = 11093 if retries >= 1:1094 attempts = retries + 11095 for attempt in range(1, attempts + 1):1096 volume = self.volume_api.get(context, vol_id)1097 volume_status = volume['status']1098 if volume_status not in ['creating', 'downloading']:1099 if volume_status != 'available':1100 LOG.warn(_("Volume id: %s finished being created but was"1101 " not set as 'available'"), vol_id)1102 return attempt1103 greenthread.sleep(CONF.block_device_allocate_retries_interval)1104 # NOTE(harlowja): Should only happen if we ran out of attempts1105 raise exception.VolumeNotCreated(volume_id=vol_id,1106 seconds=int(time.time() - start),1107 attempts=attempts)1108 def _decode_files(self, injected_files):1109 """Base64 decode the list of files to inject."""1110 if not injected_files:1111 return []1112 def _decode(f):1113 path, contents = f1114 try:1115 decoded = base64.b64decode(contents)1116 return path, decoded1117 except TypeError:1118 raise exception.Base64Exception(path=path)1119 return [_decode(f) for f in injected_files]1120 def _run_instance(self, context, request_spec,1121 filter_properties, requested_networks, injected_files,1122 admin_password, is_first_time, node, instance,1123 legacy_bdm_in_spec):1124 """Launch a new instance with specified options."""1125 extra_usage_info = {}1126 def notify(status, msg="", fault=None, **kwargs):1127 """Send a create.{start,error,end} notification."""1128 type_ = "create.%(status)s" % dict(status=status)1129 info = extra_usage_info.copy()1130 info['message'] = msg1131 self._notify_about_instance_usage(context, instance, type_,1132 extra_usage_info=info, fault=fault, **kwargs)1133 try:1134 self._prebuild_instance(context, instance)1135 if request_spec and request_spec.get('image'):1136 image_meta = request_spec['image']1137 else:1138 image_meta = {}1139 extra_usage_info = {"image_name": image_meta.get('name', '')}1140 notify("start") # notify that build is starting1141 instance, network_info = self._build_instance(context,1142 request_spec, filter_properties, requested_networks,1143 injected_files, admin_password, is_first_time, node,1144 instance, image_meta, legacy_bdm_in_spec)1145 notify("end", msg=_("Success"), network_info=network_info)1146 except exception.RescheduledException as e:1147 # Instance build encountered an error, and has been rescheduled.1148 notify("error", fault=e)1149 except exception.BuildAbortException as e:1150 # Instance build aborted due to a non-failure1151 LOG.info(e)1152 notify("end", msg=e.format_message()) # notify that build is done1153 except Exception as e:1154 # Instance build encountered a non-recoverable error:1155 with excutils.save_and_reraise_exception():1156 self._set_instance_error_state(context, instance)1157 notify("error", fault=e) # notify that build failed1158 def _prebuild_instance(self, context, instance):1159 self._check_instance_exists(context, instance)1160 try:1161 self._start_building(context, instance)1162 except (exception.InstanceNotFound,1163 exception.UnexpectedDeletingTaskStateError):1164 msg = _("Instance disappeared before we could start it")1165 # Quickly bail out of here1166 raise exception.BuildAbortException(instance_uuid=instance.uuid,1167 reason=msg)1168 def _validate_instance_group_policy(self, context, instance,1169 filter_properties):1170 # NOTE(russellb) Instance group policy is enforced by the scheduler.1171 # However, there is a race condition with the enforcement of1172 # anti-affinity. Since more than one instance may be scheduled at the1173 # same time, it's possible that more than one instance with an1174 # anti-affinity policy may end up here. This is a validation step to1175 # make sure that starting the instance here doesn't violate the policy.1176 scheduler_hints = filter_properties.get('scheduler_hints') or {}1177 group_hint = scheduler_hints.get('group')1178 if not group_hint:1179 return1180 @utils.synchronized(group_hint)1181 def _do_validation(context, instance, group_hint):1182 group = objects.InstanceGroup.get_by_hint(context, group_hint)1183 if 'anti-affinity' not in group.policies:1184 return1185 group_hosts = group.get_hosts(context, exclude=[instance.uuid])1186 if self.host in group_hosts:1187 msg = _("Anti-affinity instance group policy was violated.")1188 raise exception.RescheduledException(1189 instance_uuid=instance.uuid,1190 reason=msg)1191 _do_validation(context, instance, group_hint)1192 def _build_instance(self, context, request_spec, filter_properties,1193 requested_networks, injected_files, admin_password, is_first_time,1194 node, instance, image_meta, legacy_bdm_in_spec):1195 original_context = context1196 context = context.elevated()1197 # NOTE(danms): This method is deprecated, but could be called,1198 # and if it is, it will have an old megatuple for requested_networks.1199 if requested_networks is not None:1200 requested_networks_obj = objects.NetworkRequestList(1201 objects=[objects.NetworkRequest.from_tuple(t)1202 for t in requested_networks])1203 else:1204 requested_networks_obj = None1205 # If neutron security groups pass requested security1206 # groups to allocate_for_instance()1207 if request_spec and self.is_neutron_security_groups:1208 security_groups = request_spec.get('security_group')1209 else:1210 security_groups = []1211 if node is None:1212 node = self.driver.get_available_nodes(refresh=True)[0]1213 LOG.debug("No node specified, defaulting to %s", node)1214 network_info = None1215 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(1216 context, instance.uuid)1217 # b64 decode the files to inject:1218 injected_files_orig = injected_files1219 injected_files = self._decode_files(injected_files)1220 rt = self._get_resource_tracker(node)1221 try:1222 limits = filter_properties.get('limits', {})1223 with rt.instance_claim(context, instance, limits):1224 # NOTE(russellb) It's important that this validation be done1225 # *after* the resource tracker instance claim, as that is where1226 # the host is set on the instance.1227 self._validate_instance_group_policy(context, instance,1228 filter_properties)1229 macs = self.driver.macs_for_instance(instance)1230 dhcp_options = self.driver.dhcp_options_for_instance(instance)1231 network_info = self._allocate_network(original_context,1232 instance, requested_networks_obj, macs,1233 security_groups, dhcp_options)1234 instance.vm_state = vm_states.BUILDING1235 instance.task_state = task_states.BLOCK_DEVICE_MAPPING1236 instance.save()1237 # Verify that all the BDMs have a device_name set and assign a1238 # default to the ones missing it with the help of the driver.1239 self._default_block_device_names(context, instance, image_meta,1240 bdms)1241 block_device_info = self._prep_block_device(1242 context, instance, bdms)1243 set_access_ip = (is_first_time and1244 not instance.access_ip_v4 and1245 not instance.access_ip_v6)1246 instance = self._spawn(context, instance, image_meta,1247 network_info, block_device_info,1248 injected_files, admin_password,1249 set_access_ip=set_access_ip)1250 except (exception.InstanceNotFound,1251 exception.UnexpectedDeletingTaskStateError):1252 # the instance got deleted during the spawn1253 # Make sure the async call finishes1254 if network_info is not None:1255 network_info.wait(do_raise=False)1256 try:1257 self._deallocate_network(context, instance)1258 except Exception:1259 msg = _LE('Failed to dealloc network '1260 'for deleted instance')1261 LOG.exception(msg, instance=instance)1262 raise exception.BuildAbortException(1263 instance_uuid=instance.uuid,1264 reason=_("Instance disappeared during build"))1265 except (exception.UnexpectedTaskStateError,1266 exception.VirtualInterfaceCreateException) as e:1267 # Don't try to reschedule, just log and reraise.1268 with excutils.save_and_reraise_exception():1269 LOG.debug(e.format_message(), instance=instance)1270 # Make sure the async call finishes1271 if network_info is not None:1272 network_info.wait(do_raise=False)1273 except exception.InvalidBDM:1274 with excutils.save_and_reraise_exception():1275 if network_info is not None:1276 network_info.wait(do_raise=False)1277 try:1278 self._deallocate_network(context, instance)1279 except Exception:1280 msg = _LE('Failed to dealloc network '1281 'for failed instance')1282 LOG.exception(msg, instance=instance)1283 except Exception:1284 exc_info = sys.exc_info()1285 # try to re-schedule instance:1286 # Make sure the async call finishes1287 if network_info is not None:1288 network_info.wait(do_raise=False)1289 rescheduled = self._reschedule_or_error(original_context, instance,1290 exc_info, requested_networks, admin_password,1291 injected_files_orig, is_first_time, request_spec,1292 filter_properties, bdms, legacy_bdm_in_spec)1293 if rescheduled:1294 # log the original build error1295 self._log_original_error(exc_info, instance.uuid)1296 raise exception.RescheduledException(1297 instance_uuid=instance.uuid,1298 reason=unicode(exc_info[1]))1299 else:1300 # not re-scheduling, go to error:1301 raise exc_info[0], exc_info[1], exc_info[2]1302 # spawn success1303 return instance, network_info1304 def _log_original_error(self, exc_info, instance_uuid):1305 LOG.error(_('Error: %s') % exc_info[1], instance_uuid=instance_uuid,1306 exc_info=exc_info)1307 def _reschedule_or_error(self, context, instance, exc_info,1308 requested_networks, admin_password, injected_files, is_first_time,1309 request_spec, filter_properties, bdms=None,1310 legacy_bdm_in_spec=True):1311 """Try to re-schedule the build or re-raise the original build error to1312 error out the instance.1313 """1314 original_context = context1315 context = context.elevated()1316 instance_uuid = instance['uuid']1317 rescheduled = False1318 compute_utils.add_instance_fault_from_exc(context,1319 instance, exc_info[1], exc_info=exc_info)1320 self._notify_about_instance_usage(context, instance,1321 'instance.create.error', fault=exc_info[1])1322 try:1323 LOG.debug("Clean up resource before rescheduling.",1324 instance=instance)1325 if bdms is None:1326 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(1327 context, instance.uuid)1328 self._shutdown_instance(context, instance,1329 bdms, requested_networks)1330 self._cleanup_volumes(context, instance['uuid'], bdms)1331 except Exception:1332 # do not attempt retry if clean up failed:1333 with excutils.save_and_reraise_exception():1334 self._log_original_error(exc_info, instance_uuid)1335 try:1336 method_args = (request_spec, admin_password, injected_files,1337 requested_networks, is_first_time, filter_properties,1338 legacy_bdm_in_spec)1339 task_state = task_states.SCHEDULING1340 rescheduled = self._reschedule(original_context, request_spec,1341 filter_properties, instance,1342 self.scheduler_rpcapi.run_instance, method_args,1343 task_state, exc_info)1344 except Exception:1345 rescheduled = False1346 LOG.exception(_LE("Error trying to reschedule"),1347 instance_uuid=instance_uuid)1348 return rescheduled1349 def _reschedule(self, context, request_spec, filter_properties,1350 instance, reschedule_method, method_args, task_state,1351 exc_info=None):1352 """Attempt to re-schedule a compute operation."""1353 instance_uuid = instance['uuid']1354 retry = filter_properties.get('retry', None)1355 if not retry:1356 # no retry information, do not reschedule.1357 LOG.debug("Retry info not present, will not reschedule",1358 instance_uuid=instance_uuid)1359 return1360 if not request_spec:1361 LOG.debug("No request spec, will not reschedule",1362 instance_uuid=instance_uuid)1363 return1364 request_spec['instance_uuids'] = [instance_uuid]1365 LOG.debug("Re-scheduling %(method)s: attempt %(num)d",1366 {'method': reschedule_method.func_name,1367 'num': retry['num_attempts']}, instance_uuid=instance_uuid)1368 # reset the task state:1369 self._instance_update(context, instance_uuid, task_state=task_state)1370 if exc_info:1371 # stringify to avoid circular ref problem in json serialization:1372 retry['exc'] = traceback.format_exception_only(exc_info[0],1373 exc_info[1])1374 reschedule_method(context, *method_args)1375 return True1376 @periodic_task.periodic_task1377 def _check_instance_build_time(self, context):1378 """Ensure that instances are not stuck in build."""1379 timeout = CONF.instance_build_timeout1380 if timeout == 0:1381 return1382 filters = {'vm_state': vm_states.BUILDING,1383 'host': self.host}1384 building_insts = objects.InstanceList.get_by_filters(context,1385 filters, expected_attrs=[], use_slave=True)1386 for instance in building_insts:1387 if timeutils.is_older_than(instance['created_at'], timeout):1388 self._set_instance_error_state(context, instance)1389 LOG.warn(_("Instance build timed out. Set to error state."),1390 instance=instance)1391 def _check_instance_exists(self, context, instance):1392 """Ensure an instance with the same name is not already present."""1393 if self.driver.instance_exists(instance):1394 raise exception.InstanceExists(name=instance.name)1395 def _start_building(self, context, instance):1396 """Save the host and launched_on fields and log appropriately."""1397 LOG.audit(_('Starting instance...'), context=context,1398 instance=instance)1399 self._instance_update(context, instance.uuid,1400 vm_state=vm_states.BUILDING,1401 task_state=None,1402 expected_task_state=(task_states.SCHEDULING,1403 None))1404 def _allocate_network_async(self, context, instance, requested_networks,1405 macs, security_groups, is_vpn, dhcp_options):1406 """Method used to allocate networks in the background.1407 Broken out for testing.1408 """1409 LOG.debug("Allocating IP information in the background.",1410 instance=instance)1411 retries = CONF.network_allocate_retries1412 if retries < 0:1413 LOG.warn(_("Treating negative config value (%(retries)s) for "1414 "'network_allocate_retries' as 0."),1415 {'retries': retries})1416 attempts = retries > 1 and retries + 1 or 11417 retry_time = 11418 for attempt in range(1, attempts + 1):1419 try:1420 nwinfo = self.network_api.allocate_for_instance(1421 context, instance, vpn=is_vpn,1422 requested_networks=requested_networks,1423 macs=macs,1424 security_groups=security_groups,1425 dhcp_options=dhcp_options)1426 LOG.debug('Instance network_info: |%s|', nwinfo,1427 instance=instance)1428 sys_meta = instance.system_metadata1429 sys_meta['network_allocated'] = 'True'1430 self._instance_update(context, instance.uuid,1431 system_metadata=sys_meta)1432 return nwinfo1433 except Exception:1434 exc_info = sys.exc_info()1435 log_info = {'attempt': attempt,1436 'attempts': attempts}1437 if attempt == attempts:1438 LOG.exception(_LE('Instance failed network setup '1439 'after %(attempts)d attempt(s)'),1440 log_info)1441 raise exc_info[0], exc_info[1], exc_info[2]1442 LOG.warn(_('Instance failed network setup '1443 '(attempt %(attempt)d of %(attempts)d)'),1444 log_info, instance=instance)1445 time.sleep(retry_time)1446 retry_time *= 21447 if retry_time > 30:1448 retry_time = 301449 # Not reached.1450 def _build_networks_for_instance(self, context, instance,1451 requested_networks, security_groups):1452 # If we're here from a reschedule the network may already be allocated.1453 if strutils.bool_from_string(1454 instance.system_metadata.get('network_allocated', 'False')):1455 return self._get_instance_nw_info(context, instance)1456 if not self.is_neutron_security_groups:1457 security_groups = []1458 macs = self.driver.macs_for_instance(instance)1459 dhcp_options = self.driver.dhcp_options_for_instance(instance)1460 network_info = self._allocate_network(context, instance,1461 requested_networks, macs, security_groups, dhcp_options)1462 if not instance.access_ip_v4 and not instance.access_ip_v6:1463 # If CONF.default_access_ip_network_name is set, grab the1464 # corresponding network and set the access ip values accordingly.1465 # Note that when there are multiple ips to choose from, an1466 # arbitrary one will be chosen.1467 network_name = CONF.default_access_ip_network_name1468 if not network_name:1469 return network_info1470 for vif in network_info:1471 if vif['network']['label'] == network_name:1472 for ip in vif.fixed_ips():1473 if ip['version'] == 4:1474 instance.access_ip_v4 = ip['address']1475 if ip['version'] == 6:1476 instance.access_ip_v6 = ip['address']1477 instance.save()1478 break1479 return network_info1480 def _allocate_network(self, context, instance, requested_networks, macs,1481 security_groups, dhcp_options):1482 """Start network allocation asynchronously. Return an instance1483 of NetworkInfoAsyncWrapper that can be used to retrieve the1484 allocated networks when the operation has finished.1485 """1486 # NOTE(comstud): Since we're allocating networks asynchronously,1487 # this task state has little meaning, as we won't be in this1488 # state for very long.1489 instance.vm_state = vm_states.BUILDING1490 instance.task_state = task_states.NETWORKING1491 instance.save(expected_task_state=[None])1492 self._update_resource_tracker(context, instance)1493 is_vpn = pipelib.is_vpn_image(instance.image_ref)1494 return network_model.NetworkInfoAsyncWrapper(1495 self._allocate_network_async, context, instance,1496 requested_networks, macs, security_groups, is_vpn,1497 dhcp_options)1498 def _default_root_device_name(self, instance, image_meta, root_bdm):1499 try:1500 return self.driver.default_root_device_name(instance,1501 image_meta,1502 root_bdm)1503 except NotImplementedError:1504 return compute_utils.get_next_device_name(instance, [])1505 def _default_device_names_for_instance(self, instance,1506 root_device_name,1507 *block_device_lists):1508 try:1509 self.driver.default_device_names_for_instance(instance,1510 root_device_name,1511 *block_device_lists)1512 except NotImplementedError:1513 compute_utils.default_device_names_for_instance(1514 instance, root_device_name, *block_device_lists)1515 def _default_block_device_names(self, context, instance,1516 image_meta, block_devices):1517 """Verify that all the devices have the device_name set. If not,1518 provide a default name.1519 It also ensures that there is a root_device_name and is set to the1520 first block device in the boot sequence (boot_index=0).1521 """1522 root_bdm = block_device.get_root_bdm(block_devices)1523 if not root_bdm:1524 return1525 # Get the root_device_name from the root BDM or the instance1526 root_device_name = None1527 update_instance = False1528 update_root_bdm = False1529 if root_bdm.device_name:1530 root_device_name = root_bdm.device_name1531 instance.root_device_name = root_device_name1532 update_instance = True1533 elif instance.root_device_name:1534 root_device_name = instance.root_device_name1535 root_bdm.device_name = root_device_name1536 update_root_bdm = True1537 else:1538 root_device_name = self._default_root_device_name(instance,1539 image_meta,1540 root_bdm)1541 instance.root_device_name = root_device_name1542 root_bdm.device_name = root_device_name1543 update_instance = update_root_bdm = True1544 if update_instance:1545 instance.save()1546 if update_root_bdm:1547 root_bdm.save()1548 ephemerals = filter(block_device.new_format_is_ephemeral,1549 block_devices)1550 swap = filter(block_device.new_format_is_swap,1551 block_devices)1552 block_device_mapping = filter(1553 driver_block_device.is_block_device_mapping, block_devices)1554 self._default_device_names_for_instance(instance,1555 root_device_name,1556 ephemerals,1557 swap,1558 block_device_mapping)1559 def _prep_block_device(self, context, instance, bdms,1560 do_check_attach=True):1561 """Set up the block device for an instance with error logging."""1562 try:1563 block_device_info = {1564 'root_device_name': instance['root_device_name'],1565 'swap': driver_block_device.convert_swap(bdms),1566 'ephemerals': driver_block_device.convert_ephemerals(bdms),1567 'block_device_mapping': (1568 driver_block_device.attach_block_devices(1569 driver_block_device.convert_volumes(bdms),1570 context, instance, self.volume_api,1571 self.driver, do_check_attach=do_check_attach) +1572 driver_block_device.attach_block_devices(1573 driver_block_device.convert_snapshots(bdms),1574 context, instance, self.volume_api,1575 self.driver, self._await_block_device_map_created,1576 do_check_attach=do_check_attach) +1577 driver_block_device.attach_block_devices(1578 driver_block_device.convert_images(bdms),1579 context, instance, self.volume_api,1580 self.driver, self._await_block_device_map_created,1581 do_check_attach=do_check_attach) +1582 driver_block_device.attach_block_devices(1583 driver_block_device.convert_blanks(bdms),1584 context, instance, self.volume_api,1585 self.driver, self._await_block_device_map_created,1586 do_check_attach=do_check_attach))1587 }1588 if self.use_legacy_block_device_info:1589 for bdm_type in ('swap', 'ephemerals', 'block_device_mapping'):1590 block_device_info[bdm_type] = \1591 driver_block_device.legacy_block_devices(1592 block_device_info[bdm_type])1593 # Get swap out of the list1594 block_device_info['swap'] = driver_block_device.get_swap(1595 block_device_info['swap'])1596 return block_device_info1597 except exception.OverQuota:1598 msg = _LW('Failed to create block device for instance due to '1599 'being over volume resource quota')1600 LOG.warn(msg, instance=instance)1601 raise exception.InvalidBDM()1602 except Exception:1603 LOG.exception(_LE('Instance failed block device setup'),1604 instance=instance)1605 raise exception.InvalidBDM()1606 @object_compat1607 def _spawn(self, context, instance, image_meta, network_info,1608 block_device_info, injected_files, admin_password,1609 set_access_ip=False):1610 """Spawn an instance with error logging and update its power state."""1611 instance.vm_state = vm_states.BUILDING1612 instance.task_state = task_states.SPAWNING1613 instance.save(expected_task_state=task_states.BLOCK_DEVICE_MAPPING)1614 try:1615 self.driver.spawn(context, instance, image_meta,1616 injected_files, admin_password,1617 network_info,1618 block_device_info)1619 except Exception:1620 with excutils.save_and_reraise_exception():1621 LOG.exception(_LE('Instance failed to spawn'),1622 instance=instance)1623 current_power_state = self._get_power_state(context, instance)1624 instance.power_state = current_power_state1625 instance.vm_state = vm_states.ACTIVE1626 instance.task_state = None1627 instance.launched_at = timeutils.utcnow()1628 def _set_access_ip_values():1629 """Add access ip values for a given instance.1630 If CONF.default_access_ip_network_name is set, this method will1631 grab the corresponding network and set the access ip values1632 accordingly. Note that when there are multiple ips to choose1633 from, an arbitrary one will be chosen.1634 """1635 network_name = CONF.default_access_ip_network_name1636 if not network_name:1637 return1638 for vif in network_info:1639 if vif['network']['label'] == network_name:1640 for ip in vif.fixed_ips():1641 if ip['version'] == 4:1642 instance.access_ip_v4 = ip['address']1643 if ip['version'] == 6:1644 instance.access_ip_v6 = ip['address']1645 return1646 if set_access_ip:1647 _set_access_ip_values()1648 network_info.wait(do_raise=True)1649 instance.info_cache.network_info = network_info1650 instance.save(expected_task_state=task_states.SPAWNING)1651 return instance1652 def _notify_about_instance_usage(self, context, instance, event_suffix,1653 network_info=None, system_metadata=None,1654 extra_usage_info=None, fault=None):1655 compute_utils.notify_about_instance_usage(1656 self.notifier, context, instance, event_suffix,1657 network_info=network_info,1658 system_metadata=system_metadata,1659 extra_usage_info=extra_usage_info, fault=fault)1660 def _deallocate_network(self, context, instance,1661 requested_networks=None):1662 LOG.debug('Deallocating network for instance', instance=instance)1663 self.network_api.deallocate_for_instance(1664 context, instance, requested_networks=requested_networks)1665 def _get_instance_block_device_info(self, context, instance,1666 refresh_conn_info=False,1667 bdms=None):1668 """Transform block devices to the driver block_device format."""1669 if not bdms:1670 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(1671 context, instance['uuid'])1672 swap = driver_block_device.convert_swap(bdms)1673 ephemerals = driver_block_device.convert_ephemerals(bdms)1674 block_device_mapping = (1675 driver_block_device.convert_volumes(bdms) +1676 driver_block_device.convert_snapshots(bdms) +1677 driver_block_device.convert_images(bdms))1678 if not refresh_conn_info:1679 # if the block_device_mapping has no value in connection_info1680 # (returned as None), don't include in the mapping1681 block_device_mapping = [1682 bdm for bdm in block_device_mapping1683 if bdm.get('connection_info')]1684 else:1685 block_device_mapping = driver_block_device.refresh_conn_infos(1686 block_device_mapping, context, instance, self.volume_api,1687 self.driver)1688 if self.use_legacy_block_device_info:1689 swap = driver_block_device.legacy_block_devices(swap)1690 ephemerals = driver_block_device.legacy_block_devices(ephemerals)1691 block_device_mapping = driver_block_device.legacy_block_devices(1692 block_device_mapping)1693 # Get swap out of the list1694 swap = driver_block_device.get_swap(swap)1695 return {'swap': swap,1696 'ephemerals': ephemerals,1697 'block_device_mapping': block_device_mapping}1698 # NOTE(mikal): No object_compat wrapper on this method because its1699 # callers all pass objects already1700 @wrap_exception()1701 @reverts_task_state1702 @wrap_instance_event1703 @wrap_instance_fault1704 def build_and_run_instance(self, context, instance, image, request_spec,1705 filter_properties, admin_password=None,1706 injected_files=None, requested_networks=None,1707 security_groups=None, block_device_mapping=None,1708 node=None, limits=None):1709 # NOTE(danms): Remove this in v4.0 of the RPC API1710 if (requested_networks and1711 not isinstance(requested_networks,1712 objects.NetworkRequestList)):1713 requested_networks = objects.NetworkRequestList(1714 objects=[objects.NetworkRequest.from_tuple(t)1715 for t in requested_networks])1716 @utils.synchronized(instance.uuid)1717 def do_build_and_run_instance(context, instance, image, request_spec,1718 filter_properties, admin_password, injected_files,1719 requested_networks, security_groups, block_device_mapping,1720 node=None, limits=None):1721 try:1722 LOG.audit(_('Starting instance...'), context=context,1723 instance=instance)1724 instance.vm_state = vm_states.BUILDING1725 instance.task_state = None1726 instance.save(expected_task_state=1727 (task_states.SCHEDULING, None))1728 except exception.InstanceNotFound:1729 msg = 'Instance disappeared before build.'1730 LOG.debug(msg, instance=instance)1731 return1732 except exception.UnexpectedTaskStateError as e:1733 LOG.debug(e.format_message(), instance=instance)1734 return1735 # b64 decode the files to inject:1736 decoded_files = self._decode_files(injected_files)1737 if limits is None:1738 limits = {}1739 if node is None:1740 node = self.driver.get_available_nodes(refresh=True)[0]1741 LOG.debug('No node specified, defaulting to %s', node,1742 instance=instance)1743 try:1744 self._build_and_run_instance(context, instance, image,1745 decoded_files, admin_password, requested_networks,1746 security_groups, block_device_mapping, node, limits,1747 filter_properties)1748 except exception.RescheduledException as e:1749 LOG.debug(e.format_message(), instance=instance)1750 retry = filter_properties.get('retry', None)1751 if not retry:1752 # no retry information, do not reschedule.1753 LOG.debug("Retry info not present, will not reschedule",1754 instance=instance)1755 self._cleanup_allocated_networks(context, instance,1756 requested_networks)1757 compute_utils.add_instance_fault_from_exc(context,1758 instance, e, sys.exc_info())1759 self._set_instance_error_state(context, instance)1760 return1761 retry['exc'] = traceback.format_exception(*sys.exc_info())1762 # NOTE(comstud): Deallocate networks if the driver wants1763 # us to do so.1764 if self.driver.deallocate_networks_on_reschedule(instance):1765 self._cleanup_allocated_networks(context, instance,1766 requested_networks)1767 instance.task_state = task_states.SCHEDULING1768 instance.save()1769 self.compute_task_api.build_instances(context, [instance],1770 image, filter_properties, admin_password,1771 injected_files, requested_networks, security_groups,1772 block_device_mapping)1773 except (exception.InstanceNotFound,1774 exception.UnexpectedDeletingTaskStateError):1775 msg = 'Instance disappeared during build.'1776 LOG.debug(msg, instance=instance)1777 self._cleanup_allocated_networks(context, instance,1778 requested_networks)1779 except exception.BuildAbortException as e:1780 LOG.exception(e.format_message(), instance=instance)1781 self._cleanup_allocated_networks(context, instance,1782 requested_networks)1783 self._cleanup_volumes(context, instance.uuid,1784 block_device_mapping, raise_exc=False)1785 compute_utils.add_instance_fault_from_exc(context, instance,1786 e, sys.exc_info())1787 self._set_instance_error_state(context, instance)1788 except Exception as e:1789 # Should not reach here.1790 msg = _LE('Unexpected build failure, not rescheduling build.')1791 LOG.exception(msg, instance=instance)1792 self._cleanup_allocated_networks(context, instance,1793 requested_networks)1794 self._cleanup_volumes(context, instance.uuid,1795 block_device_mapping, raise_exc=False)1796 compute_utils.add_instance_fault_from_exc(context, instance,1797 e, sys.exc_info())1798 self._set_instance_error_state(context, instance)1799 do_build_and_run_instance(context, instance, image, request_spec,1800 filter_properties, admin_password, injected_files,1801 requested_networks, security_groups, block_device_mapping,1802 node, limits)1803 def _build_and_run_instance(self, context, instance, image, injected_files,1804 admin_password, requested_networks, security_groups,1805 block_device_mapping, node, limits, filter_properties):1806 image_name = image.get('name')1807 self._notify_about_instance_usage(context, instance, 'create.start',1808 extra_usage_info={'image_name': image_name})1809 try:1810 rt = self._get_resource_tracker(node)1811 with rt.instance_claim(context, instance, limits):1812 # NOTE(russellb) It's important that this validation be done1813 # *after* the resource tracker instance claim, as that is where1814 # the host is set on the instance.1815 self._validate_instance_group_policy(context, instance,1816 filter_properties)1817 with self._build_resources(context, instance,1818 requested_networks, security_groups, image,1819 block_device_mapping) as resources:1820 instance.vm_state = vm_states.BUILDING1821 instance.task_state = task_states.SPAWNING1822 instance.save(expected_task_state=1823 task_states.BLOCK_DEVICE_MAPPING)1824 block_device_info = resources['block_device_info']1825 network_info = resources['network_info']1826 self.driver.spawn(context, instance, image,1827 injected_files, admin_password,1828 network_info=network_info,1829 block_device_info=block_device_info)1830 except (exception.InstanceNotFound,1831 exception.UnexpectedDeletingTaskStateError) as e:1832 with excutils.save_and_reraise_exception():1833 self._notify_about_instance_usage(context, instance,1834 'create.end', fault=e)1835 except exception.ComputeResourcesUnavailable as e:1836 LOG.debug(e.format_message(), instance=instance)1837 self._notify_about_instance_usage(context, instance,1838 'create.error', fault=e)1839 raise exception.RescheduledException(1840 instance_uuid=instance.uuid, reason=e.format_message())1841 except exception.BuildAbortException as e:1842 with excutils.save_and_reraise_exception():1843 LOG.debug(e.format_message(), instance=instance)1844 self._notify_about_instance_usage(context, instance,1845 'create.error', fault=e)1846 except (exception.FixedIpLimitExceeded,1847 exception.NoMoreNetworks) as e:1848 LOG.warn(_LW('No more network or fixed IP to be allocated'),1849 instance=instance)1850 self._notify_about_instance_usage(context, instance,1851 'create.error', fault=e)1852 msg = _('Failed to allocate the network(s) with error %s, '1853 'not rescheduling.') % e.format_message()1854 raise exception.BuildAbortException(instance_uuid=instance.uuid,1855 reason=msg)1856 except (exception.VirtualInterfaceCreateException,1857 exception.VirtualInterfaceMacAddressException) as e:1858 LOG.exception(_LE('Failed to allocate network(s)'),1859 instance=instance)1860 self._notify_about_instance_usage(context, instance,1861 'create.error', fault=e)1862 msg = _('Failed to allocate the network(s), not rescheduling.')1863 raise exception.BuildAbortException(instance_uuid=instance.uuid,1864 reason=msg)1865 except (exception.FlavorDiskTooSmall,1866 exception.FlavorMemoryTooSmall,1867 exception.ImageNotActive,1868 exception.ImageUnacceptable) as e:1869 self._notify_about_instance_usage(context, instance,1870 'create.error', fault=e)1871 raise exception.BuildAbortException(instance_uuid=instance.uuid,1872 reason=e.format_message())1873 except Exception as e:1874 self._notify_about_instance_usage(context, instance,1875 'create.error', fault=e)1876 raise exception.RescheduledException(1877 instance_uuid=instance.uuid, reason=six.text_type(e))1878 # NOTE(alaski): This is only useful during reschedules, remove it now.1879 instance.system_metadata.pop('network_allocated', None)1880 instance.power_state = self._get_power_state(context, instance)1881 instance.vm_state = vm_states.ACTIVE1882 instance.task_state = None1883 instance.launched_at = timeutils.utcnow()1884 try:1885 instance.save(expected_task_state=task_states.SPAWNING)1886 except (exception.InstanceNotFound,1887 exception.UnexpectedDeletingTaskStateError) as e:1888 with excutils.save_and_reraise_exception():1889 self._notify_about_instance_usage(context, instance,1890 'create.end', fault=e)1891 self._notify_about_instance_usage(context, instance, 'create.end',1892 extra_usage_info={'message': _('Success')},1893 network_info=network_info)1894 @contextlib.contextmanager1895 def _build_resources(self, context, instance, requested_networks,1896 security_groups, image, block_device_mapping):1897 resources = {}1898 network_info = None1899 try:1900 network_info = self._build_networks_for_instance(context, instance,1901 requested_networks, security_groups)1902 resources['network_info'] = network_info1903 except (exception.InstanceNotFound,1904 exception.UnexpectedDeletingTaskStateError):1905 raise1906 except exception.UnexpectedTaskStateError as e:1907 raise exception.BuildAbortException(instance_uuid=instance.uuid,1908 reason=e.format_message())1909 except Exception:1910 # Because this allocation is async any failures are likely to occur1911 # when the driver accesses network_info during spawn().1912 LOG.exception(_LE('Failed to allocate network(s)'),1913 instance=instance)1914 msg = _('Failed to allocate the network(s), not rescheduling.')1915 raise exception.BuildAbortException(instance_uuid=instance.uuid,1916 reason=msg)1917 try:1918 # Verify that all the BDMs have a device_name set and assign a1919 # default to the ones missing it with the help of the driver.1920 self._default_block_device_names(context, instance, image,1921 block_device_mapping)1922 instance.vm_state = vm_states.BUILDING1923 instance.task_state = task_states.BLOCK_DEVICE_MAPPING1924 instance.save()1925 block_device_info = self._prep_block_device(context, instance,1926 block_device_mapping)1927 resources['block_device_info'] = block_device_info1928 except (exception.InstanceNotFound,1929 exception.UnexpectedDeletingTaskStateError):1930 with excutils.save_and_reraise_exception() as ctxt:1931 # Make sure the async call finishes1932 if network_info is not None:1933 network_info.wait(do_raise=False)1934 except exception.UnexpectedTaskStateError as e:1935 # Make sure the async call finishes1936 if network_info is not None:1937 network_info.wait(do_raise=False)1938 raise exception.BuildAbortException(instance_uuid=instance.uuid,1939 reason=e.format_message())1940 except Exception:1941 LOG.exception(_LE('Failure prepping block device'),1942 instance=instance)1943 # Make sure the async call finishes1944 if network_info is not None:1945 network_info.wait(do_raise=False)1946 msg = _('Failure prepping block device.')1947 raise exception.BuildAbortException(instance_uuid=instance.uuid,1948 reason=msg)1949 try:1950 yield resources1951 except Exception as exc:1952 with excutils.save_and_reraise_exception() as ctxt:1953 if not isinstance(exc, (exception.InstanceNotFound,1954 exception.UnexpectedDeletingTaskStateError)):1955 LOG.exception(_LE('Instance failed to spawn'),1956 instance=instance)1957 # Make sure the async call finishes1958 if network_info is not None:1959 network_info.wait(do_raise=False)1960 try:1961 self._shutdown_instance(context, instance,1962 block_device_mapping, requested_networks,1963 try_deallocate_networks=False)1964 except Exception:1965 ctxt.reraise = False1966 msg = _('Could not clean up failed build,'1967 ' not rescheduling')1968 raise exception.BuildAbortException(1969 instance_uuid=instance.uuid, reason=msg)1970 def _cleanup_allocated_networks(self, context, instance,1971 requested_networks):1972 try:1973 self._deallocate_network(context, instance, requested_networks)1974 except Exception:1975 msg = _LE('Failed to deallocate networks')1976 LOG.exception(msg, instance=instance)1977 return1978 instance.system_metadata['network_allocated'] = 'False'1979 try:1980 instance.save()1981 except exception.InstanceNotFound:1982 # NOTE(alaski): It's possible that we're cleaning up the networks1983 # because the instance was deleted. If that's the case then this1984 # exception will be raised by instance.save()1985 pass1986 @object_compat1987 @messaging.expected_exceptions(exception.BuildAbortException,1988 exception.UnexpectedTaskStateError,1989 exception.VirtualInterfaceCreateException,1990 exception.RescheduledException)1991 @wrap_exception()1992 @reverts_task_state1993 @wrap_instance_event1994 @wrap_instance_fault1995 def run_instance(self, context, instance, request_spec,1996 filter_properties, requested_networks,1997 injected_files, admin_password,1998 is_first_time, node, legacy_bdm_in_spec):1999 # NOTE(alaski) This method should be deprecated when the scheduler and2000 # compute rpc interfaces are bumped to 4.x, and slated for removal in2001 # 5.x as it is no longer used.2002 if filter_properties is None:2003 filter_properties = {}2004 @utils.synchronized(instance.uuid)2005 def do_run_instance():2006 self._run_instance(context, request_spec,2007 filter_properties, requested_networks, injected_files,2008 admin_password, is_first_time, node, instance,2009 legacy_bdm_in_spec)2010 do_run_instance()2011 def _try_deallocate_network(self, context, instance,2012 requested_networks=None):2013 try:2014 # tear down allocated network structure2015 self._deallocate_network(context, instance, requested_networks)2016 except Exception:2017 with excutils.save_and_reraise_exception():2018 LOG.error(_('Failed to deallocate network for instance.'),2019 instance=instance)2020 self._set_instance_error_state(context, instance)2021 def _get_power_off_values(self, context, instance, clean_shutdown):2022 """Get the timing configuration for powering down this instance."""2023 if clean_shutdown:2024 timeout = compute_utils.get_value_from_system_metadata(instance,2025 key='image_os_shutdown_timeout', type=int,2026 default=CONF.shutdown_timeout)2027 retry_interval = self.SHUTDOWN_RETRY_INTERVAL2028 else:2029 timeout = 02030 retry_interval = 02031 return timeout, retry_interval2032 def _power_off_instance(self, context, instance, clean_shutdown=True):2033 """Power off an instance on this host."""2034 timeout, retry_interval = self._get_power_off_values(context,2035 instance, clean_shutdown)2036 self.driver.power_off(instance, timeout, retry_interval)2037 def _shutdown_instance(self, context, instance,2038 bdms, requested_networks=None, notify=True,2039 try_deallocate_networks=True):2040 """Shutdown an instance on this host.2041 :param:context: security context2042 :param:instance: a nova.objects.Instance object2043 :param:bdms: the block devices for the instance to be torn2044 down2045 :param:requested_networks: the networks on which the instance2046 has ports2047 :param:notify: true if a final usage notification should be2048 emitted2049 :param:try_deallocate_networks: false if we should avoid2050 trying to teardown networking2051 """2052 context = context.elevated()2053 LOG.audit(_('%(action_str)s instance') % {'action_str': 'Terminating'},2054 context=context, instance=instance)2055 if notify:2056 self._notify_about_instance_usage(context, instance,2057 "shutdown.start")2058 network_info = compute_utils.get_nw_info_for_instance(instance)2059 # NOTE(vish) get bdms before destroying the instance2060 vol_bdms = [bdm for bdm in bdms if bdm.is_volume]2061 block_device_info = self._get_instance_block_device_info(2062 context, instance, bdms=bdms)2063 # NOTE(melwitt): attempt driver destroy before releasing ip, may2064 # want to keep ip allocated for certain failures2065 try:2066 self.driver.destroy(context, instance, network_info,2067 block_device_info)2068 except exception.InstancePowerOffFailure:2069 # if the instance can't power off, don't release the ip2070 with excutils.save_and_reraise_exception():2071 pass2072 except Exception:2073 with excutils.save_and_reraise_exception():2074 # deallocate ip and fail without proceeding to2075 # volume api calls, preserving current behavior2076 if try_deallocate_networks:2077 self._try_deallocate_network(context, instance,2078 requested_networks)2079 if try_deallocate_networks:2080 self._try_deallocate_network(context, instance, requested_networks)2081 for bdm in vol_bdms:2082 try:2083 # NOTE(vish): actual driver detach done in driver.destroy, so2084 # just tell cinder that we are done with it.2085 connector = self.driver.get_volume_connector(instance)2086 self.volume_api.terminate_connection(context,2087 bdm.volume_id,2088 connector)2089 self.volume_api.detach(context, bdm.volume_id)2090 except exception.DiskNotFound as exc:2091 LOG.debug('Ignoring DiskNotFound: %s', exc,2092 instance=instance)2093 except exception.VolumeNotFound as exc:2094 LOG.debug('Ignoring VolumeNotFound: %s', exc,2095 instance=instance)2096 except cinder_exception.EndpointNotFound as exc:2097 LOG.warn(_LW('Ignoring EndpointNotFound: %s'), exc,2098 instance=instance)2099 if notify:2100 self._notify_about_instance_usage(context, instance,2101 "shutdown.end")2102 def _cleanup_volumes(self, context, instance_uuid, bdms, raise_exc=True):2103 exc_info = None2104 for bdm in bdms:2105 LOG.debug("terminating bdm %s", bdm,2106 instance_uuid=instance_uuid)2107 if bdm.volume_id and bdm.delete_on_termination:2108 try:2109 self.volume_api.delete(context, bdm.volume_id)2110 except Exception as exc:2111 exc_info = sys.exc_info()2112 LOG.warn(_LW('Failed to delete volume: %(volume_id)s due '2113 'to %(exc)s'), {'volume_id': bdm.volume_id,2114 'exc': unicode(exc)})2115 if exc_info is not None and raise_exc:2116 six.reraise(exc_info[0], exc_info[1], exc_info[2])2117 @hooks.add_hook("delete_instance")2118 def _delete_instance(self, context, instance, bdms, quotas):2119 """Delete an instance on this host. Commit or rollback quotas2120 as necessary.2121 """2122 instance_uuid = instance['uuid']2123 was_soft_deleted = instance['vm_state'] == vm_states.SOFT_DELETED2124 if was_soft_deleted:2125 # Instances in SOFT_DELETED vm_state have already had quotas2126 # decremented.2127 try:2128 quotas.rollback()2129 except Exception:2130 pass2131 try:2132 events = self.instance_events.clear_events_for_instance(instance)2133 if events:2134 LOG.debug('Events pending at deletion: %(events)s',2135 {'events': ','.join(events.keys())},2136 instance=instance)2137 instance.info_cache.delete()2138 self._notify_about_instance_usage(context, instance,2139 "delete.start")2140 self._shutdown_instance(context, instance, bdms)2141 # NOTE(vish): We have already deleted the instance, so we have2142 # to ignore problems cleaning up the volumes. It2143 # would be nice to let the user know somehow that2144 # the volume deletion failed, but it is not2145 # acceptable to have an instance that can not be2146 # deleted. Perhaps this could be reworked in the2147 # future to set an instance fault the first time2148 # and to only ignore the failure if the instance2149 # is already in ERROR.2150 self._cleanup_volumes(context, instance_uuid, bdms,2151 raise_exc=False)2152 # if a delete task succeed, always update vm state and task2153 # state without expecting task state to be DELETING2154 instance.vm_state = vm_states.DELETED2155 instance.task_state = None2156 instance.terminated_at = timeutils.utcnow()2157 instance.save()2158 self._update_resource_tracker(context, instance)2159 system_meta = instance.system_metadata2160 instance.destroy()2161 except Exception:2162 with excutils.save_and_reraise_exception():2163 quotas.rollback()2164 self._complete_deletion(context,2165 instance,2166 bdms,2167 quotas,2168 system_meta)2169 @wrap_exception()2170 @reverts_task_state2171 @wrap_instance_event2172 @wrap_instance_fault2173 def terminate_instance(self, context, instance, bdms, reservations):2174 """Terminate an instance on this host."""2175 # NOTE (ndipanov): If we get non-object BDMs, just get them from the2176 # db again, as this means they are sent in the old format and we want2177 # to avoid converting them back when we can just get them.2178 # Remove this when we bump the RPC major version to 4.02179 if (bdms and2180 any(not isinstance(bdm, obj_base.NovaObject)2181 for bdm in bdms)):2182 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(2183 context, instance.uuid)2184 quotas = objects.Quotas.from_reservations(context,2185 reservations,2186 instance=instance)2187 @utils.synchronized(instance['uuid'])2188 def do_terminate_instance(instance, bdms):2189 try:2190 self._delete_instance(context, instance, bdms, quotas)2191 except exception.InstanceNotFound:2192 LOG.info(_("Instance disappeared during terminate"),2193 instance=instance)2194 except Exception:2195 # As we're trying to delete always go to Error if something2196 # goes wrong that _delete_instance can't handle.2197 with excutils.save_and_reraise_exception():2198 LOG.exception(_LE('Setting instance vm_state to ERROR'),2199 instance=instance)2200 self._set_instance_error_state(context, instance)2201 do_terminate_instance(instance, bdms)2202 # NOTE(johannes): This is probably better named power_off_instance2203 # so it matches the driver method, but because of other issues, we2204 # can't use that name in grizzly.2205 @wrap_exception()2206 @reverts_task_state2207 @wrap_instance_event2208 @wrap_instance_fault2209 def stop_instance(self, context, instance, clean_shutdown=True):2210 """Stopping an instance on this host."""2211 @utils.synchronized(instance.uuid)2212 def do_stop_instance():2213 current_power_state = self._get_power_state(context, instance)2214 LOG.debug('Stopping instance; current vm_state: %(vm_state)s, '2215 'current task_state: %(task_state)s, current DB '2216 'power_state: %(db_power_state)s, current VM '2217 'power_state: %(current_power_state)s',2218 dict(vm_state=instance.vm_state,2219 task_state=instance.task_state,2220 db_power_state=instance.power_state,2221 current_power_state=current_power_state),2222 instance_uuid=instance.uuid)2223 # NOTE(mriedem): If the instance is already powered off, we are2224 # possibly tearing down and racing with other operations, so we can2225 # expect the task_state to be None if something else updates the2226 # instance and we're not locking it.2227 expected_task_state = [task_states.POWERING_OFF]2228 # The list of power states is from _sync_instance_power_state.2229 if current_power_state in (power_state.NOSTATE,2230 power_state.SHUTDOWN,2231 power_state.CRASHED):2232 LOG.info(_LI('Instance is already powered off in the '2233 'hypervisor when stop is called.'),2234 instance=instance)2235 expected_task_state.append(None)2236 self._notify_about_instance_usage(context, instance,2237 "power_off.start")2238 self._power_off_instance(context, instance, clean_shutdown)2239 current_power_state = self._get_power_state(context, instance)2240 instance.power_state = current_power_state2241 instance.vm_state = vm_states.STOPPED2242 instance.task_state = None2243 instance.save(expected_task_state=expected_task_state)2244 self._notify_about_instance_usage(context, instance,2245 "power_off.end")2246 do_stop_instance()2247 def _power_on(self, context, instance):2248 network_info = self._get_instance_nw_info(context, instance)2249 block_device_info = self._get_instance_block_device_info(context,2250 instance)2251 self.driver.power_on(context, instance,2252 network_info,2253 block_device_info)2254 # NOTE(johannes): This is probably better named power_on_instance2255 # so it matches the driver method, but because of other issues, we2256 # can't use that name in grizzly.2257 @wrap_exception()2258 @reverts_task_state2259 @wrap_instance_event2260 @wrap_instance_fault2261 def start_instance(self, context, instance):2262 """Starting an instance on this host."""2263 self._notify_about_instance_usage(context, instance, "power_on.start")2264 self._power_on(context, instance)2265 current_power_state = self._get_power_state(context, instance)2266 instance.power_state = current_power_state2267 instance.vm_state = vm_states.ACTIVE2268 instance.task_state = None2269 instance.save(expected_task_state=task_states.POWERING_ON)2270 self._notify_about_instance_usage(context, instance, "power_on.end")2271 @wrap_exception()2272 @reverts_task_state2273 @wrap_instance_event2274 @wrap_instance_fault2275 def soft_delete_instance(self, context, instance, reservations):2276 """Soft delete an instance on this host."""2277 quotas = objects.Quotas.from_reservations(context,2278 reservations,2279 instance=instance)2280 try:2281 self._notify_about_instance_usage(context, instance,2282 "soft_delete.start")2283 try:2284 self.driver.soft_delete(instance)2285 except NotImplementedError:2286 # Fallback to just powering off the instance if the2287 # hypervisor doesn't implement the soft_delete method2288 self.driver.power_off(instance)2289 current_power_state = self._get_power_state(context, instance)2290 instance.power_state = current_power_state2291 instance.vm_state = vm_states.SOFT_DELETED2292 instance.task_state = None2293 instance.save(expected_task_state=[task_states.SOFT_DELETING])2294 except Exception:2295 with excutils.save_and_reraise_exception():2296 quotas.rollback()2297 quotas.commit()2298 self._notify_about_instance_usage(context, instance, "soft_delete.end")2299 @object_compat2300 @wrap_exception()2301 @reverts_task_state2302 @wrap_instance_event2303 @wrap_instance_fault2304 def restore_instance(self, context, instance):2305 """Restore a soft-deleted instance on this host."""2306 self._notify_about_instance_usage(context, instance, "restore.start")2307 try:2308 self.driver.restore(instance)2309 except NotImplementedError:2310 # Fallback to just powering on the instance if the hypervisor2311 # doesn't implement the restore method2312 self._power_on(context, instance)2313 current_power_state = self._get_power_state(context, instance)2314 instance.power_state = current_power_state2315 instance.vm_state = vm_states.ACTIVE2316 instance.task_state = None2317 instance.save(expected_task_state=task_states.RESTORING)2318 self._notify_about_instance_usage(context, instance, "restore.end")2319 def _rebuild_default_impl(self, context, instance, image_meta,2320 injected_files, admin_password, bdms,2321 detach_block_devices, attach_block_devices,2322 network_info=None,2323 recreate=False, block_device_info=None,2324 preserve_ephemeral=False):2325 if preserve_ephemeral:2326 # The default code path does not support preserving ephemeral2327 # partitions.2328 raise exception.PreserveEphemeralNotSupported()2329 detach_block_devices(context, bdms)2330 if not recreate:2331 self.driver.destroy(context, instance, network_info,2332 block_device_info=block_device_info)2333 instance.task_state = task_states.REBUILD_BLOCK_DEVICE_MAPPING2334 instance.save(expected_task_state=[task_states.REBUILDING])2335 new_block_device_info = attach_block_devices(context, instance, bdms)2336 instance.task_state = task_states.REBUILD_SPAWNING2337 instance.save(2338 expected_task_state=[task_states.REBUILD_BLOCK_DEVICE_MAPPING])2339 self.driver.spawn(context, instance, image_meta, injected_files,2340 admin_password, network_info=network_info,2341 block_device_info=new_block_device_info)2342 @object_compat2343 @messaging.expected_exceptions(exception.PreserveEphemeralNotSupported)2344 @wrap_exception()2345 @reverts_task_state2346 @wrap_instance_event2347 @wrap_instance_fault2348 def rebuild_instance(self, context, instance, orig_image_ref, image_ref,2349 injected_files, new_pass, orig_sys_metadata,2350 bdms, recreate, on_shared_storage,2351 preserve_ephemeral=False):2352 """Destroy and re-make this instance.2353 A 'rebuild' effectively purges all existing data from the system and2354 remakes the VM with given 'metadata' and 'personalities'.2355 :param context: `nova.RequestContext` object2356 :param instance: Instance object2357 :param orig_image_ref: Original image_ref before rebuild2358 :param image_ref: New image_ref for rebuild2359 :param injected_files: Files to inject2360 :param new_pass: password to set on rebuilt instance2361 :param orig_sys_metadata: instance system metadata from pre-rebuild2362 :param bdms: block-device-mappings to use for rebuild2363 :param recreate: True if the instance is being recreated (e.g. the2364 hypervisor it was on failed) - cleanup of old state will be2365 skipped.2366 :param on_shared_storage: True if instance files on shared storage2367 :param preserve_ephemeral: True if the default ephemeral storage2368 partition must be preserved on rebuild2369 """2370 context = context.elevated()2371 # NOTE (ndipanov): If we get non-object BDMs, just get them from the2372 # db again, as this means they are sent in the old format and we want2373 # to avoid converting them back when we can just get them.2374 # Remove this on the next major RPC version bump2375 if (bdms and2376 any(not isinstance(bdm, obj_base.NovaObject)2377 for bdm in bdms)):2378 bdms = None2379 orig_vm_state = instance.vm_state2380 with self._error_out_instance_on_exception(context, instance):2381 LOG.audit(_("Rebuilding instance"), context=context,2382 instance=instance)2383 if recreate:2384 if not self.driver.capabilities["supports_recreate"]:2385 raise exception.InstanceRecreateNotSupported2386 self._check_instance_exists(context, instance)2387 # To cover case when admin expects that instance files are on2388 # shared storage, but not accessible and vice versa2389 if on_shared_storage != self.driver.instance_on_disk(instance):2390 raise exception.InvalidSharedStorage(2391 _("Invalid state of instance files on shared"2392 " storage"))2393 if on_shared_storage:2394 LOG.info(_('disk on shared storage, recreating using'2395 ' existing disk'))2396 else:2397 image_ref = orig_image_ref = instance.image_ref2398 LOG.info(_("disk not on shared storage, rebuilding from:"2399 " '%s'") % str(image_ref))2400 # NOTE(mriedem): On a recreate (evacuate), we need to update2401 # the instance's host and node properties to reflect it's2402 # destination node for the recreate.2403 node_name = None2404 try:2405 compute_node = self._get_compute_info(context, self.host)2406 node_name = compute_node.hypervisor_hostname2407 except exception.NotFound:2408 LOG.exception(_LE('Failed to get compute_info for %s'),2409 self.host)2410 finally:2411 instance.host = self.host2412 instance.node = node_name2413 instance.save()2414 if image_ref:2415 image_meta = self.image_api.get(context, image_ref)2416 else:2417 image_meta = {}2418 # This instance.exists message should contain the original2419 # image_ref, not the new one. Since the DB has been updated2420 # to point to the new one... we have to override it.2421 # TODO(jaypipes): Move generate_image_url() into the nova.image.api2422 orig_image_ref_url = glance.generate_image_url(orig_image_ref)2423 extra_usage_info = {'image_ref_url': orig_image_ref_url}2424 self.conductor_api.notify_usage_exists(context,2425 obj_base.obj_to_primitive(instance),2426 current_period=True, system_metadata=orig_sys_metadata,2427 extra_usage_info=extra_usage_info)2428 # This message should contain the new image_ref2429 extra_usage_info = {'image_name': image_meta.get('name', '')}2430 self._notify_about_instance_usage(context, instance,2431 "rebuild.start", extra_usage_info=extra_usage_info)2432 instance.power_state = self._get_power_state(context, instance)2433 instance.task_state = task_states.REBUILDING2434 instance.save(expected_task_state=[task_states.REBUILDING])2435 if recreate:2436 self.network_api.setup_networks_on_host(2437 context, instance, self.host)2438 network_info = compute_utils.get_nw_info_for_instance(instance)2439 if bdms is None:2440 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(2441 context, instance.uuid)2442 block_device_info = \2443 self._get_instance_block_device_info(2444 context, instance, bdms=bdms)2445 def detach_block_devices(context, bdms):2446 for bdm in bdms:2447 if bdm.is_volume:2448 self.volume_api.detach(context, bdm.volume_id)2449 files = self._decode_files(injected_files)2450 kwargs = dict(2451 context=context,2452 instance=instance,2453 image_meta=image_meta,2454 injected_files=files,2455 admin_password=new_pass,2456 bdms=bdms,2457 detach_block_devices=detach_block_devices,2458 attach_block_devices=self._prep_block_device,2459 block_device_info=block_device_info,2460 network_info=network_info,2461 preserve_ephemeral=preserve_ephemeral)2462 try:2463 self.driver.rebuild(**kwargs)2464 except NotImplementedError:2465 # NOTE(rpodolyaka): driver doesn't provide specialized version2466 # of rebuild, fall back to the default implementation2467 self._rebuild_default_impl(**kwargs)2468 instance.power_state = self._get_power_state(context, instance)2469 instance.vm_state = vm_states.ACTIVE2470 instance.task_state = None2471 instance.launched_at = timeutils.utcnow()2472 instance.save(expected_task_state=[task_states.REBUILD_SPAWNING])2473 if orig_vm_state == vm_states.STOPPED:2474 LOG.info(_LI("bringing vm to original state: '%s'"),2475 orig_vm_state, instance=instance)2476 instance.vm_state = vm_states.ACTIVE2477 instance.task_state = task_states.POWERING_OFF2478 instance.progress = 02479 instance.save()2480 self.stop_instance(context, instance)2481 self._notify_about_instance_usage(2482 context, instance, "rebuild.end",2483 network_info=network_info,2484 extra_usage_info=extra_usage_info)2485 def _handle_bad_volumes_detached(self, context, instance, bad_devices,2486 block_device_info):2487 """Handle cases where the virt-layer had to detach non-working volumes2488 in order to complete an operation.2489 """2490 for bdm in block_device_info['block_device_mapping']:2491 if bdm.get('mount_device') in bad_devices:2492 try:2493 volume_id = bdm['connection_info']['data']['volume_id']2494 except KeyError:2495 continue2496 # NOTE(sirp): ideally we'd just call2497 # `compute_api.detach_volume` here but since that hits the2498 # DB directly, that's off limits from within the2499 # compute-manager.2500 #2501 # API-detach2502 LOG.info(_("Detaching from volume api: %s") % volume_id)2503 volume = self.volume_api.get(context, volume_id)2504 self.volume_api.check_detach(context, volume)2505 self.volume_api.begin_detaching(context, volume_id)2506 # Manager-detach2507 self.detach_volume(context, volume_id, instance)2508 @wrap_exception()2509 @reverts_task_state2510 @wrap_instance_event2511 @wrap_instance_fault2512 def reboot_instance(self, context, instance, block_device_info,2513 reboot_type):2514 """Reboot an instance on this host."""2515 # acknowledge the request made it to the manager2516 if reboot_type == "SOFT":2517 instance.task_state = task_states.REBOOT_PENDING2518 expected_states = (task_states.REBOOTING,2519 task_states.REBOOT_PENDING,2520 task_states.REBOOT_STARTED)2521 else:2522 instance.task_state = task_states.REBOOT_PENDING_HARD2523 expected_states = (task_states.REBOOTING_HARD,2524 task_states.REBOOT_PENDING_HARD,2525 task_states.REBOOT_STARTED_HARD)2526 context = context.elevated()2527 LOG.audit(_("Rebooting instance"), context=context, instance=instance)2528 block_device_info = self._get_instance_block_device_info(context,2529 instance)2530 network_info = self._get_instance_nw_info(context, instance)2531 self._notify_about_instance_usage(context, instance, "reboot.start")2532 current_power_state = self._get_power_state(context, instance)2533 instance.power_state = current_power_state2534 instance.save(expected_task_state=expected_states)2535 if instance['power_state'] != power_state.RUNNING:2536 state = instance['power_state']2537 running = power_state.RUNNING2538 LOG.warn(_('trying to reboot a non-running instance:'2539 ' (state: %(state)s expected: %(running)s)'),2540 {'state': state, 'running': running},2541 context=context, instance=instance)2542 def bad_volumes_callback(bad_devices):2543 self._handle_bad_volumes_detached(2544 context, instance, bad_devices, block_device_info)2545 try:2546 # Don't change it out of rescue mode2547 if instance['vm_state'] == vm_states.RESCUED:2548 new_vm_state = vm_states.RESCUED2549 else:2550 new_vm_state = vm_states.ACTIVE2551 new_power_state = None2552 if reboot_type == "SOFT":2553 instance.task_state = task_states.REBOOT_STARTED2554 expected_state = task_states.REBOOT_PENDING2555 else:2556 instance.task_state = task_states.REBOOT_STARTED_HARD2557 expected_state = task_states.REBOOT_PENDING_HARD2558 instance.save(expected_task_state=expected_state)2559 self.driver.reboot(context, instance,2560 network_info,2561 reboot_type,2562 block_device_info=block_device_info,2563 bad_volumes_callback=bad_volumes_callback)2564 except Exception as error:2565 with excutils.save_and_reraise_exception() as ctxt:2566 exc_info = sys.exc_info()2567 # if the reboot failed but the VM is running don't2568 # put it into an error state2569 new_power_state = self._get_power_state(context, instance)2570 if new_power_state == power_state.RUNNING:2571 LOG.warning(_('Reboot failed but instance is running'),2572 context=context, instance=instance)2573 compute_utils.add_instance_fault_from_exc(context,2574 instance, error, exc_info)2575 self._notify_about_instance_usage(context, instance,2576 'reboot.error', fault=error)2577 ctxt.reraise = False2578 else:2579 LOG.error(_('Cannot reboot instance: %s'), error,2580 context=context, instance=instance)2581 self._set_instance_obj_error_state(context, instance)2582 if not new_power_state:2583 new_power_state = self._get_power_state(context, instance)2584 try:2585 instance.power_state = new_power_state2586 instance.vm_state = new_vm_state2587 instance.task_state = None2588 instance.save()2589 except exception.InstanceNotFound:2590 LOG.warn(_("Instance disappeared during reboot"),2591 context=context, instance=instance)2592 self._notify_about_instance_usage(context, instance, "reboot.end")2593 @delete_image_on_error2594 def _do_snapshot_instance(self, context, image_id, instance, rotation):2595 if rotation < 0:2596 raise exception.RotationRequiredForBackup()2597 self._snapshot_instance(context, image_id, instance,2598 task_states.IMAGE_BACKUP)2599 @wrap_exception()2600 @reverts_task_state2601 @wrap_instance_fault2602 def backup_instance(self, context, image_id, instance, backup_type,2603 rotation):2604 """Backup an instance on this host.2605 :param backup_type: daily | weekly2606 :param rotation: int representing how many backups to keep around2607 """2608 self._do_snapshot_instance(context, image_id, instance, rotation)2609 self._rotate_backups(context, instance, backup_type, rotation)2610 @wrap_exception()2611 @reverts_task_state2612 @wrap_instance_fault2613 @delete_image_on_error2614 def snapshot_instance(self, context, image_id, instance):2615 """Snapshot an instance on this host.2616 :param context: security context2617 :param instance: a nova.objects.instance.Instance object2618 :param image_id: glance.db.sqlalchemy.models.Image.Id2619 """2620 # NOTE(dave-mcnally) the task state will already be set by the api2621 # but if the compute manager has crashed/been restarted prior to the2622 # request getting here the task state may have been cleared so we set2623 # it again and things continue normally2624 try:2625 instance.task_state = task_states.IMAGE_SNAPSHOT2626 instance.save(2627 expected_task_state=task_states.IMAGE_SNAPSHOT_PENDING)2628 except exception.InstanceNotFound:2629 # possibility instance no longer exists, no point in continuing2630 LOG.debug("Instance not found, could not set state %s "2631 "for instance.",2632 task_states.IMAGE_SNAPSHOT, instance=instance)2633 return2634 except exception.UnexpectedDeletingTaskStateError:2635 LOG.debug("Instance being deleted, snapshot cannot continue",2636 instance=instance)2637 return2638 self._snapshot_instance(context, image_id, instance,2639 task_states.IMAGE_SNAPSHOT)2640 def _snapshot_instance(self, context, image_id, instance,2641 expected_task_state):2642 context = context.elevated()2643 current_power_state = self._get_power_state(context, instance)2644 try:2645 instance.power_state = current_power_state2646 instance.save()2647 LOG.audit(_('instance snapshotting'), context=context,2648 instance=instance)2649 if instance.power_state != power_state.RUNNING:2650 state = instance.power_state2651 running = power_state.RUNNING2652 LOG.warn(_('trying to snapshot a non-running instance: '2653 '(state: %(state)s expected: %(running)s)'),2654 {'state': state, 'running': running},2655 instance=instance)2656 self._notify_about_instance_usage(2657 context, instance, "snapshot.start")2658 def update_task_state(task_state,2659 expected_state=expected_task_state):2660 instance.task_state = task_state2661 instance.save(expected_task_state=expected_state)2662 self.driver.snapshot(context, instance, image_id,2663 update_task_state)2664 instance.task_state = None2665 instance.save(expected_task_state=task_states.IMAGE_UPLOADING)2666 self._notify_about_instance_usage(context, instance,2667 "snapshot.end")2668 except (exception.InstanceNotFound,2669 exception.UnexpectedDeletingTaskStateError):2670 # the instance got deleted during the snapshot2671 # Quickly bail out of here2672 msg = 'Instance disappeared during snapshot'2673 LOG.debug(msg, instance=instance)2674 try:2675 image_service = glance.get_default_image_service()2676 image = image_service.show(context, image_id)2677 if image['status'] != 'active':2678 image_service.delete(context, image_id)2679 except Exception:2680 LOG.warning(_("Error while trying to clean up image %s"),2681 image_id, instance=instance)2682 except exception.ImageNotFound:2683 instance.task_state = None2684 instance.save()2685 msg = _("Image not found during snapshot")2686 LOG.warn(msg, instance=instance)2687 def _post_interrupted_snapshot_cleanup(self, context, instance):2688 self.driver.post_interrupted_snapshot_cleanup(context, instance)2689 @object_compat2690 @messaging.expected_exceptions(NotImplementedError)2691 def volume_snapshot_create(self, context, instance, volume_id,2692 create_info):2693 self.driver.volume_snapshot_create(context, instance, volume_id,2694 create_info)2695 @object_compat2696 @messaging.expected_exceptions(NotImplementedError)2697 def volume_snapshot_delete(self, context, instance, volume_id,2698 snapshot_id, delete_info):2699 self.driver.volume_snapshot_delete(context, instance, volume_id,2700 snapshot_id, delete_info)2701 @wrap_instance_fault2702 def _rotate_backups(self, context, instance, backup_type, rotation):2703 """Delete excess backups associated to an instance.2704 Instances are allowed a fixed number of backups (the rotation number);2705 this method deletes the oldest backups that exceed the rotation2706 threshold.2707 :param context: security context2708 :param instance: Instance dict2709 :param backup_type: daily | weekly2710 :param rotation: int representing how many backups to keep around;2711 None if rotation shouldn't be used (as in the case of snapshots)2712 """2713 filters = {'property-image_type': 'backup',2714 'property-backup_type': backup_type,2715 'property-instance_uuid': instance.uuid}2716 images = self.image_api.get_all(context, filters=filters,2717 sort_key='created_at', sort_dir='desc')2718 num_images = len(images)2719 LOG.debug("Found %(num_images)d images (rotation: %(rotation)d)",2720 {'num_images': num_images, 'rotation': rotation},2721 instance=instance)2722 if num_images > rotation:2723 # NOTE(sirp): this deletes all backups that exceed the rotation2724 # limit2725 excess = len(images) - rotation2726 LOG.debug("Rotating out %d backups", excess,2727 instance=instance)2728 for i in xrange(excess):2729 image = images.pop()2730 image_id = image['id']2731 LOG.debug("Deleting image %s", image_id,2732 instance=instance)2733 self.image_api.delete(context, image_id)2734 @object_compat2735 @wrap_exception()2736 @reverts_task_state2737 @wrap_instance_event2738 @wrap_instance_fault2739 def set_admin_password(self, context, instance, new_pass):2740 """Set the root/admin password for an instance on this host.2741 This is generally only called by API password resets after an2742 image has been built.2743 @param context: Nova auth context.2744 @param instance: Nova instance object.2745 @param new_pass: The admin password for the instance.2746 """2747 context = context.elevated()2748 if new_pass is None:2749 # Generate a random password2750 new_pass = utils.generate_password()2751 current_power_state = self._get_power_state(context, instance)2752 expected_state = power_state.RUNNING2753 if current_power_state != expected_state:2754 instance.task_state = None2755 instance.save(expected_task_state=task_states.UPDATING_PASSWORD)2756 _msg = _('Failed to set admin password. Instance %s is not'2757 ' running') % instance.uuid2758 raise exception.InstancePasswordSetFailed(2759 instance=instance.uuid, reason=_msg)2760 try:2761 self.driver.set_admin_password(instance, new_pass)2762 LOG.audit(_("Root password set"), instance=instance)2763 instance.task_state = None2764 instance.save(2765 expected_task_state=task_states.UPDATING_PASSWORD)2766 except NotImplementedError:2767 _msg = _('set_admin_password is not implemented '2768 'by this driver or guest instance.')2769 LOG.warn(_msg, instance=instance)2770 instance.task_state = None2771 instance.save(2772 expected_task_state=task_states.UPDATING_PASSWORD)2773 raise NotImplementedError(_msg)2774 except exception.UnexpectedTaskStateError:2775 # interrupted by another (most likely delete) task2776 # do not retry2777 raise2778 except Exception as e:2779 # Catch all here because this could be anything.2780 LOG.exception(_LE('set_admin_password failed: %s'), e,2781 instance=instance)2782 self._set_instance_obj_error_state(context, instance)2783 # We create a new exception here so that we won't2784 # potentially reveal password information to the2785 # API caller. The real exception is logged above2786 _msg = _('error setting admin password')2787 raise exception.InstancePasswordSetFailed(2788 instance=instance.uuid, reason=_msg)2789 @wrap_exception()2790 @reverts_task_state2791 @wrap_instance_fault2792 def inject_file(self, context, path, file_contents, instance):2793 """Write a file to the specified path in an instance on this host."""2794 # NOTE(russellb) Remove this method, as well as the underlying virt2795 # driver methods, when the compute rpc interface is bumped to 4.x2796 # as it is no longer used.2797 context = context.elevated()2798 current_power_state = self._get_power_state(context, instance)2799 expected_state = power_state.RUNNING2800 if current_power_state != expected_state:2801 LOG.warn(_('trying to inject a file into a non-running (state: '2802 '%(current_state)s expected: %(expected_state)s)'),2803 {'current_state': current_power_state,2804 'expected_state': expected_state},2805 instance=instance)2806 LOG.audit(_('injecting file to %s'), path,2807 instance=instance)2808 self.driver.inject_file(instance, path, file_contents)2809 def _get_rescue_image(self, context, instance, rescue_image_ref=None):2810 """Determine what image should be used to boot the rescue VM."""2811 # 1. If rescue_image_ref is passed in, use that for rescue.2812 # 2. Else, use the base image associated with instance's current image.2813 # The idea here is to provide the customer with a rescue2814 # environment which they are familiar with.2815 # So, if they built their instance off of a Debian image,2816 # their rescue VM will also be Debian.2817 # 3. As a last resort, use instance's current image.2818 if not rescue_image_ref:2819 system_meta = utils.instance_sys_meta(instance)2820 rescue_image_ref = system_meta.get('image_base_image_ref')2821 if not rescue_image_ref:2822 LOG.warn(_('Unable to find a different image to use for rescue VM,'2823 ' using instance\'s current image'), instance=instance)2824 rescue_image_ref = instance.image_ref2825 image_meta = compute_utils.get_image_metadata(context, self.image_api,2826 rescue_image_ref,2827 instance)2828 # NOTE(belliott) bug #1227350 - xenapi needs the actual image id2829 image_meta['id'] = rescue_image_ref2830 return image_meta2831 @object_compat2832 @wrap_exception()2833 @reverts_task_state2834 @wrap_instance_event2835 @wrap_instance_fault2836 def rescue_instance(self, context, instance, rescue_password,2837 rescue_image_ref=None, clean_shutdown=True):2838 context = context.elevated()2839 LOG.audit(_('Rescuing'), context=context, instance=instance)2840 admin_password = (rescue_password if rescue_password else2841 utils.generate_password())2842 network_info = self._get_instance_nw_info(context, instance)2843 rescue_image_meta = self._get_rescue_image(context, instance,2844 rescue_image_ref)2845 extra_usage_info = {'rescue_image_name':2846 rescue_image_meta.get('name', '')}2847 self._notify_about_instance_usage(context, instance,2848 "rescue.start", extra_usage_info=extra_usage_info,2849 network_info=network_info)2850 try:2851 self._power_off_instance(context, instance, clean_shutdown)2852 self.driver.rescue(context, instance,2853 network_info,2854 rescue_image_meta, admin_password)2855 except Exception as e:2856 LOG.exception(_LE("Error trying to Rescue Instance"),2857 instance=instance)2858 raise exception.InstanceNotRescuable(2859 instance_id=instance.uuid,2860 reason=_("Driver Error: %s") % unicode(e))2861 self.conductor_api.notify_usage_exists(context, instance,2862 current_period=True)2863 current_power_state = self._get_power_state(context, instance)2864 instance.vm_state = vm_states.RESCUED2865 instance.task_state = None2866 instance.power_state = current_power_state2867 instance.launched_at = timeutils.utcnow()2868 instance.save(expected_task_state=task_states.RESCUING)2869 self._notify_about_instance_usage(context, instance,2870 "rescue.end", extra_usage_info=extra_usage_info,2871 network_info=network_info)2872 @object_compat2873 @wrap_exception()2874 @reverts_task_state2875 @wrap_instance_event2876 @wrap_instance_fault2877 def unrescue_instance(self, context, instance):2878 context = context.elevated()2879 LOG.audit(_('Unrescuing'), context=context, instance=instance)2880 network_info = self._get_instance_nw_info(context, instance)2881 self._notify_about_instance_usage(context, instance,2882 "unrescue.start", network_info=network_info)2883 with self._error_out_instance_on_exception(context, instance):2884 self.driver.unrescue(instance,2885 network_info)2886 current_power_state = self._get_power_state(context, instance)2887 instance.vm_state = vm_states.ACTIVE2888 instance.task_state = None2889 instance.power_state = current_power_state2890 instance.save(expected_task_state=task_states.UNRESCUING)2891 self._notify_about_instance_usage(context,2892 instance,2893 "unrescue.end",2894 network_info=network_info)2895 @object_compat2896 @wrap_exception()2897 @wrap_instance_fault2898 def change_instance_metadata(self, context, diff, instance):2899 """Update the metadata published to the instance."""2900 LOG.debug("Changing instance metadata according to %r",2901 diff, instance=instance)2902 self.driver.change_instance_metadata(context, instance, diff)2903 def _cleanup_stored_instance_types(self, migration, instance,2904 restore_old=False):2905 """Clean up "old" and "new" instance_type information stored in2906 instance's system_metadata. Optionally update the "current"2907 instance_type to the saved old one first.2908 Returns the updated system_metadata as a dict, the2909 post-cleanup current instance type and the to-be dropped2910 instance type.2911 """2912 sys_meta = instance.system_metadata2913 if restore_old:2914 instance_type = flavors.extract_flavor(instance, 'old_')2915 drop_instance_type = flavors.extract_flavor(instance)2916 sys_meta = flavors.save_flavor_info(sys_meta, instance_type)2917 else:2918 instance_type = flavors.extract_flavor(instance)2919 drop_instance_type = flavors.extract_flavor(instance, 'old_')2920 flavors.delete_flavor_info(sys_meta, 'old_')2921 flavors.delete_flavor_info(sys_meta, 'new_')2922 return sys_meta, instance_type, drop_instance_type2923 @wrap_exception()2924 @wrap_instance_event2925 @wrap_instance_fault2926 def confirm_resize(self, context, instance, reservations, migration):2927 quotas = objects.Quotas.from_reservations(context,2928 reservations,2929 instance=instance)2930 @utils.synchronized(instance['uuid'])2931 def do_confirm_resize(context, instance, migration_id):2932 # NOTE(wangpan): Get the migration status from db, if it has been2933 # confirmed, we do nothing and return here2934 LOG.debug("Going to confirm migration %s", migration_id,2935 context=context, instance=instance)2936 try:2937 # TODO(russellb) Why are we sending the migration object just2938 # to turn around and look it up from the db again?2939 migration = objects.Migration.get_by_id(2940 context.elevated(), migration_id)2941 except exception.MigrationNotFound:2942 LOG.error(_("Migration %s is not found during confirmation") %2943 migration_id, context=context, instance=instance)2944 quotas.rollback()2945 return2946 if migration.status == 'confirmed':2947 LOG.info(_("Migration %s is already confirmed") %2948 migration_id, context=context, instance=instance)2949 quotas.rollback()2950 return2951 elif migration.status not in ('finished', 'confirming'):2952 LOG.warn(_("Unexpected confirmation status '%(status)s' of "2953 "migration %(id)s, exit confirmation process") %2954 {"status": migration.status, "id": migration_id},2955 context=context, instance=instance)2956 quotas.rollback()2957 return2958 # NOTE(wangpan): Get the instance from db, if it has been2959 # deleted, we do nothing and return here2960 expected_attrs = ['metadata', 'system_metadata']2961 try:2962 instance = objects.Instance.get_by_uuid(2963 context, instance.uuid,2964 expected_attrs=expected_attrs)2965 except exception.InstanceNotFound:2966 LOG.info(_("Instance is not found during confirmation"),2967 context=context, instance=instance)2968 quotas.rollback()2969 return2970 self._confirm_resize(context, instance, quotas,2971 migration=migration)2972 do_confirm_resize(context, instance, migration.id)2973 def _confirm_resize(self, context, instance, quotas,2974 migration=None):2975 """Destroys the source instance."""2976 self._notify_about_instance_usage(context, instance,2977 "resize.confirm.start")2978 with self._error_out_instance_on_exception(context, instance,2979 quotas=quotas):2980 # NOTE(danms): delete stashed migration information2981 sys_meta, instance_type, old_instance_type = (2982 self._cleanup_stored_instance_types(migration, instance))2983 sys_meta.pop('old_vm_state', None)2984 instance.system_metadata = sys_meta2985 instance.save()2986 # NOTE(tr3buchet): tear down networks on source host2987 self.network_api.setup_networks_on_host(context, instance,2988 migration.source_compute, teardown=True)2989 network_info = self._get_instance_nw_info(context, instance)2990 self.driver.confirm_migration(migration, instance,2991 network_info)2992 migration.status = 'confirmed'2993 migration.save(context.elevated())2994 rt = self._get_resource_tracker(migration.source_node)2995 rt.drop_resize_claim(context, instance, old_instance_type)2996 # NOTE(mriedem): The old_vm_state could be STOPPED but the user2997 # might have manually powered up the instance to confirm the2998 # resize/migrate, so we need to check the current power state2999 # on the instance and set the vm_state appropriately. We default3000 # to ACTIVE because if the power state is not SHUTDOWN, we3001 # assume _sync_instance_power_state will clean it up.3002 p_state = instance.power_state3003 vm_state = None3004 if p_state == power_state.SHUTDOWN:3005 vm_state = vm_states.STOPPED3006 LOG.debug("Resized/migrated instance is powered off. "3007 "Setting vm_state to '%s'.", vm_state,3008 instance=instance)3009 else:3010 vm_state = vm_states.ACTIVE3011 instance.vm_state = vm_state3012 instance.task_state = None3013 instance.save(expected_task_state=[None, task_states.DELETING])3014 self._notify_about_instance_usage(3015 context, instance, "resize.confirm.end",3016 network_info=network_info)3017 quotas.commit()3018 @wrap_exception()3019 @reverts_task_state3020 @wrap_instance_event3021 @wrap_instance_fault3022 def revert_resize(self, context, instance, migration, reservations):3023 """Destroys the new instance on the destination machine.3024 Reverts the model changes, and powers on the old instance on the3025 source machine.3026 """3027 quotas = quotas_obj.Quotas.from_reservations(context,3028 reservations,3029 instance=instance)3030 # NOTE(comstud): A revert_resize is essentially a resize back to3031 # the old size, so we need to send a usage event here.3032 self.conductor_api.notify_usage_exists(3033 context, instance, current_period=True)3034 with self._error_out_instance_on_exception(context, instance,3035 quotas=quotas):3036 # NOTE(tr3buchet): tear down networks on destination host3037 self.network_api.setup_networks_on_host(context, instance,3038 teardown=True)3039 instance_p = obj_base.obj_to_primitive(instance)3040 migration_p = obj_base.obj_to_primitive(migration)3041 self.network_api.migrate_instance_start(context,3042 instance_p,3043 migration_p)3044 network_info = self._get_instance_nw_info(context, instance)3045 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(3046 context, instance.uuid)3047 block_device_info = self._get_instance_block_device_info(3048 context, instance, bdms=bdms)3049 self.driver.destroy(context, instance, network_info,3050 block_device_info)3051 self._terminate_volume_connections(context, instance, bdms)3052 migration.status = 'reverted'3053 migration.save(context.elevated())3054 rt = self._get_resource_tracker(instance.node)3055 rt.drop_resize_claim(context, instance)3056 self.compute_rpcapi.finish_revert_resize(context, instance,3057 migration, migration.source_compute,3058 quotas.reservations)3059 @wrap_exception()3060 @reverts_task_state3061 @wrap_instance_event3062 @wrap_instance_fault3063 def finish_revert_resize(self, context, instance, reservations, migration):3064 """Finishes the second half of reverting a resize.3065 Bring the original source instance state back (active/shutoff) and3066 revert the resized attributes in the database.3067 """3068 quotas = quotas_obj.Quotas.from_reservations(context,3069 reservations,3070 instance=instance)3071 with self._error_out_instance_on_exception(context, instance,3072 quotas=quotas):3073 network_info = self._get_instance_nw_info(context, instance)3074 self._notify_about_instance_usage(3075 context, instance, "resize.revert.start")3076 sys_meta, instance_type, drop_instance_type = (3077 self._cleanup_stored_instance_types(migration, instance, True))3078 # NOTE(mriedem): delete stashed old_vm_state information; we3079 # default to ACTIVE for backwards compatibility if old_vm_state3080 # is not set3081 old_vm_state = sys_meta.pop('old_vm_state', vm_states.ACTIVE)3082 instance.system_metadata = sys_meta3083 instance.memory_mb = instance_type['memory_mb']3084 instance.vcpus = instance_type['vcpus']3085 instance.root_gb = instance_type['root_gb']3086 instance.ephemeral_gb = instance_type['ephemeral_gb']3087 instance.instance_type_id = instance_type['id']3088 instance.host = migration['source_compute']3089 instance.node = migration['source_node']3090 instance.save()3091 self.network_api.setup_networks_on_host(context, instance,3092 migration['source_compute'])3093 block_device_info = self._get_instance_block_device_info(3094 context, instance, refresh_conn_info=True)3095 power_on = old_vm_state != vm_states.STOPPED3096 self.driver.finish_revert_migration(context, instance,3097 network_info,3098 block_device_info, power_on)3099 instance.launched_at = timeutils.utcnow()3100 instance.save(expected_task_state=task_states.RESIZE_REVERTING)3101 instance_p = obj_base.obj_to_primitive(instance)3102 migration_p = obj_base.obj_to_primitive(migration)3103 self.network_api.migrate_instance_finish(context,3104 instance_p,3105 migration_p)3106 # if the original vm state was STOPPED, set it back to STOPPED3107 LOG.info(_("Updating instance to original state: '%s'") %3108 old_vm_state)3109 if power_on:3110 instance.vm_state = vm_states.ACTIVE3111 instance.task_state = None3112 instance.save()3113 else:3114 instance.task_state = task_states.POWERING_OFF3115 instance.save()3116 self.stop_instance(context, instance=instance)3117 self._notify_about_instance_usage(3118 context, instance, "resize.revert.end")3119 quotas.commit()3120 def _prep_resize(self, context, image, instance, instance_type,3121 quotas, request_spec, filter_properties, node):3122 if not filter_properties:3123 filter_properties = {}3124 if not instance['host']:3125 self._set_instance_error_state(context, instance)3126 msg = _('Instance has no source host')3127 raise exception.MigrationError(msg)3128 same_host = instance['host'] == self.host3129 if same_host and not CONF.allow_resize_to_same_host:3130 self._set_instance_error_state(context, instance)3131 msg = _('destination same as source!')3132 raise exception.MigrationError(msg)3133 # NOTE(danms): Stash the new instance_type to avoid having to3134 # look it up in the database later3135 sys_meta = instance.system_metadata3136 flavors.save_flavor_info(sys_meta, instance_type, prefix='new_')3137 # NOTE(mriedem): Stash the old vm_state so we can set the3138 # resized/reverted instance back to the same state later.3139 vm_state = instance['vm_state']3140 LOG.debug('Stashing vm_state: %s', vm_state, instance=instance)3141 sys_meta['old_vm_state'] = vm_state3142 instance.save()3143 limits = filter_properties.get('limits', {})3144 rt = self._get_resource_tracker(node)3145 with rt.resize_claim(context, instance, instance_type,3146 image_meta=image, limits=limits) as claim:3147 LOG.audit(_('Migrating'), context=context, instance=instance)3148 self.compute_rpcapi.resize_instance(3149 context, instance, claim.migration, image,3150 instance_type, quotas.reservations)3151 @wrap_exception()3152 @reverts_task_state3153 @wrap_instance_event3154 @wrap_instance_fault3155 def prep_resize(self, context, image, instance, instance_type,3156 reservations, request_spec, filter_properties, node):3157 """Initiates the process of moving a running instance to another host.3158 Possibly changes the RAM and disk size in the process.3159 """3160 if node is None:3161 node = self.driver.get_available_nodes(refresh=True)[0]3162 LOG.debug("No node specified, defaulting to %s", node,3163 instance=instance)3164 quotas = quotas_obj.Quotas.from_reservations(context,3165 reservations,3166 instance=instance)3167 with self._error_out_instance_on_exception(context, instance,3168 quotas=quotas):3169 self.conductor_api.notify_usage_exists(3170 context, instance, current_period=True)3171 self._notify_about_instance_usage(3172 context, instance, "resize.prep.start")3173 try:3174 self._prep_resize(context, image, instance,3175 instance_type, quotas,3176 request_spec, filter_properties,3177 node)3178 # NOTE(dgenin): This is thrown in LibvirtDriver when the3179 # instance to be migrated is backed by LVM.3180 # Remove when LVM migration is implemented.3181 except exception.MigrationPreCheckError:3182 raise3183 except Exception:3184 # try to re-schedule the resize elsewhere:3185 exc_info = sys.exc_info()3186 self._reschedule_resize_or_reraise(context, image, instance,3187 exc_info, instance_type, quotas, request_spec,3188 filter_properties)3189 finally:3190 extra_usage_info = dict(3191 new_instance_type=instance_type['name'],3192 new_instance_type_id=instance_type['id'])3193 self._notify_about_instance_usage(3194 context, instance, "resize.prep.end",3195 extra_usage_info=extra_usage_info)3196 def _reschedule_resize_or_reraise(self, context, image, instance, exc_info,3197 instance_type, quotas, request_spec, filter_properties):3198 """Try to re-schedule the resize or re-raise the original error to3199 error out the instance.3200 """3201 if not request_spec:3202 request_spec = {}3203 if not filter_properties:3204 filter_properties = {}3205 rescheduled = False3206 instance_uuid = instance['uuid']3207 try:3208 reschedule_method = self.compute_task_api.resize_instance3209 scheduler_hint = dict(filter_properties=filter_properties)3210 method_args = (instance, None, scheduler_hint, instance_type,3211 quotas.reservations)3212 task_state = task_states.RESIZE_PREP3213 rescheduled = self._reschedule(context, request_spec,3214 filter_properties, instance, reschedule_method,3215 method_args, task_state, exc_info)3216 except Exception as error:3217 rescheduled = False3218 LOG.exception(_LE("Error trying to reschedule"),3219 instance_uuid=instance_uuid)3220 compute_utils.add_instance_fault_from_exc(context,3221 instance, error,3222 exc_info=sys.exc_info())3223 self._notify_about_instance_usage(context, instance,3224 'resize.error', fault=error)3225 if rescheduled:3226 self._log_original_error(exc_info, instance_uuid)3227 compute_utils.add_instance_fault_from_exc(context,3228 instance, exc_info[1], exc_info=exc_info)3229 self._notify_about_instance_usage(context, instance,3230 'resize.error', fault=exc_info[1])3231 else:3232 # not re-scheduling3233 raise exc_info[0], exc_info[1], exc_info[2]3234 @wrap_exception()3235 @reverts_task_state3236 @wrap_instance_event3237 @errors_out_migration3238 @wrap_instance_fault3239 def resize_instance(self, context, instance, image,3240 reservations, migration, instance_type,3241 clean_shutdown=True):3242 """Starts the migration of a running instance to another host."""3243 quotas = quotas_obj.Quotas.from_reservations(context,3244 reservations,3245 instance=instance)3246 with self._error_out_instance_on_exception(context, instance,3247 quotas=quotas):3248 if not instance_type:3249 instance_type = objects.Flavor.get_by_id(3250 context, migration['new_instance_type_id'])3251 network_info = self._get_instance_nw_info(context, instance)3252 migration.status = 'migrating'3253 migration.save(context.elevated())3254 instance.task_state = task_states.RESIZE_MIGRATING3255 instance.save(expected_task_state=task_states.RESIZE_PREP)3256 self._notify_about_instance_usage(3257 context, instance, "resize.start", network_info=network_info)3258 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(3259 context, instance.uuid)3260 block_device_info = self._get_instance_block_device_info(3261 context, instance, bdms=bdms)3262 timeout, retry_interval = self._get_power_off_values(context,3263 instance, clean_shutdown)3264 disk_info = self.driver.migrate_disk_and_power_off(3265 context, instance, migration.dest_host,3266 instance_type, network_info,3267 block_device_info,3268 timeout, retry_interval)3269 self._terminate_volume_connections(context, instance, bdms)3270 migration_p = obj_base.obj_to_primitive(migration)3271 instance_p = obj_base.obj_to_primitive(instance)3272 self.network_api.migrate_instance_start(context,3273 instance_p,3274 migration_p)3275 migration.status = 'post-migrating'3276 migration.save(context.elevated())3277 instance.host = migration.dest_compute3278 instance.node = migration.dest_node3279 instance.task_state = task_states.RESIZE_MIGRATED3280 instance.save(expected_task_state=task_states.RESIZE_MIGRATING)3281 self.compute_rpcapi.finish_resize(context, instance,3282 migration, image, disk_info,3283 migration.dest_compute, reservations=quotas.reservations)3284 self._notify_about_instance_usage(context, instance, "resize.end",3285 network_info=network_info)3286 self.instance_events.clear_events_for_instance(instance)3287 def _terminate_volume_connections(self, context, instance, bdms):3288 connector = self.driver.get_volume_connector(instance)3289 for bdm in bdms:3290 if bdm.is_volume:3291 self.volume_api.terminate_connection(context, bdm.volume_id,3292 connector)3293 @staticmethod3294 def _save_instance_info(instance, instance_type, sys_meta):3295 flavors.save_flavor_info(sys_meta, instance_type)3296 instance.instance_type_id = instance_type['id']3297 instance.memory_mb = instance_type['memory_mb']3298 instance.vcpus = instance_type['vcpus']3299 instance.root_gb = instance_type['root_gb']3300 instance.ephemeral_gb = instance_type['ephemeral_gb']3301 instance.system_metadata = sys_meta3302 instance.save()3303 def _finish_resize(self, context, instance, migration, disk_info,3304 image):3305 resize_instance = False3306 old_instance_type_id = migration['old_instance_type_id']3307 new_instance_type_id = migration['new_instance_type_id']3308 old_instance_type = flavors.extract_flavor(instance)3309 sys_meta = instance.system_metadata3310 # NOTE(mriedem): Get the old_vm_state so we know if we should3311 # power on the instance. If old_vm_state is not set we need to default3312 # to ACTIVE for backwards compatibility3313 old_vm_state = sys_meta.get('old_vm_state', vm_states.ACTIVE)3314 flavors.save_flavor_info(sys_meta,3315 old_instance_type,3316 prefix='old_')3317 if old_instance_type_id != new_instance_type_id:3318 instance_type = flavors.extract_flavor(instance, prefix='new_')3319 self._save_instance_info(instance, instance_type, sys_meta)3320 resize_instance = True3321 # NOTE(tr3buchet): setup networks on destination host3322 self.network_api.setup_networks_on_host(context, instance,3323 migration['dest_compute'])3324 instance_p = obj_base.obj_to_primitive(instance)3325 migration_p = obj_base.obj_to_primitive(migration)3326 self.network_api.migrate_instance_finish(context,3327 instance_p,3328 migration_p)3329 network_info = self._get_instance_nw_info(context, instance)3330 instance.task_state = task_states.RESIZE_FINISH3331 instance.system_metadata = sys_meta3332 instance.save(expected_task_state=task_states.RESIZE_MIGRATED)3333 self._notify_about_instance_usage(3334 context, instance, "finish_resize.start",3335 network_info=network_info)3336 block_device_info = self._get_instance_block_device_info(3337 context, instance, refresh_conn_info=True)3338 # NOTE(mriedem): If the original vm_state was STOPPED, we don't3339 # automatically power on the instance after it's migrated3340 power_on = old_vm_state != vm_states.STOPPED3341 try:3342 self.driver.finish_migration(context, migration, instance,3343 disk_info,3344 network_info,3345 image, resize_instance,3346 block_device_info, power_on)3347 except Exception:3348 with excutils.save_and_reraise_exception():3349 if resize_instance:3350 self._save_instance_info(instance,3351 old_instance_type, sys_meta)3352 migration.status = 'finished'3353 migration.save(context.elevated())3354 instance.vm_state = vm_states.RESIZED3355 instance.task_state = None3356 instance.launched_at = timeutils.utcnow()3357 instance.save(expected_task_state=task_states.RESIZE_FINISH)3358 self._notify_about_instance_usage(3359 context, instance, "finish_resize.end",3360 network_info=network_info)3361 @wrap_exception()3362 @reverts_task_state3363 @wrap_instance_event3364 @errors_out_migration3365 @wrap_instance_fault3366 def finish_resize(self, context, disk_info, image, instance,3367 reservations, migration):3368 """Completes the migration process.3369 Sets up the newly transferred disk and turns on the instance at its3370 new host machine.3371 """3372 quotas = quotas_obj.Quotas.from_reservations(context,3373 reservations,3374 instance=instance)3375 try:3376 self._finish_resize(context, instance, migration,3377 disk_info, image)3378 quotas.commit()3379 except Exception:3380 LOG.exception(_LE('Setting instance vm_state to ERROR'),3381 instance=instance)3382 with excutils.save_and_reraise_exception():3383 try:3384 quotas.rollback()3385 except Exception as qr_error:3386 LOG.exception(_LE("Failed to rollback quota for failed "3387 "finish_resize: %s"),3388 qr_error, instance=instance)3389 self._set_instance_error_state(context, instance)3390 @object_compat3391 @wrap_exception()3392 @wrap_instance_fault3393 def add_fixed_ip_to_instance(self, context, network_id, instance):3394 """Calls network_api to add new fixed_ip to instance3395 then injects the new network info and resets instance networking.3396 """3397 self._notify_about_instance_usage(3398 context, instance, "create_ip.start")3399 network_info = self.network_api.add_fixed_ip_to_instance(context,3400 instance,3401 network_id)3402 self._inject_network_info(context, instance, network_info)3403 self.reset_network(context, instance)3404 # NOTE(russellb) We just want to bump updated_at. See bug 1143466.3405 instance.updated_at = timeutils.utcnow()3406 instance.save()3407 self._notify_about_instance_usage(3408 context, instance, "create_ip.end", network_info=network_info)3409 @object_compat3410 @wrap_exception()3411 @wrap_instance_fault3412 def remove_fixed_ip_from_instance(self, context, address, instance):3413 """Calls network_api to remove existing fixed_ip from instance3414 by injecting the altered network info and resetting3415 instance networking.3416 """3417 self._notify_about_instance_usage(3418 context, instance, "delete_ip.start")3419 network_info = self.network_api.remove_fixed_ip_from_instance(context,3420 instance,3421 address)3422 self._inject_network_info(context, instance, network_info)3423 self.reset_network(context, instance)3424 # NOTE(russellb) We just want to bump updated_at. See bug 1143466.3425 instance.updated_at = timeutils.utcnow()3426 instance.save()3427 self._notify_about_instance_usage(3428 context, instance, "delete_ip.end", network_info=network_info)3429 @wrap_exception()3430 @reverts_task_state3431 @wrap_instance_event3432 @wrap_instance_fault3433 def pause_instance(self, context, instance):3434 """Pause an instance on this host."""3435 context = context.elevated()3436 LOG.audit(_('Pausing'), context=context, instance=instance)3437 self._notify_about_instance_usage(context, instance, 'pause.start')3438 self.driver.pause(instance)3439 current_power_state = self._get_power_state(context, instance)3440 instance.power_state = current_power_state3441 instance.vm_state = vm_states.PAUSED3442 instance.task_state = None3443 instance.save(expected_task_state=task_states.PAUSING)3444 self._notify_about_instance_usage(context, instance, 'pause.end')3445 @wrap_exception()3446 @reverts_task_state3447 @wrap_instance_event3448 @wrap_instance_fault3449 def unpause_instance(self, context, instance):3450 """Unpause a paused instance on this host."""3451 context = context.elevated()3452 LOG.audit(_('Unpausing'), context=context, instance=instance)3453 self._notify_about_instance_usage(context, instance, 'unpause.start')3454 self.driver.unpause(instance)3455 current_power_state = self._get_power_state(context, instance)3456 instance.power_state = current_power_state3457 instance.vm_state = vm_states.ACTIVE3458 instance.task_state = None3459 instance.save(expected_task_state=task_states.UNPAUSING)3460 self._notify_about_instance_usage(context, instance, 'unpause.end')3461 @wrap_exception()3462 def host_power_action(self, context, action):3463 """Reboots, shuts down or powers up the host."""3464 # TODO(russellb) Remove the unused host parameter from the driver API3465 return self.driver.host_power_action(None, action)3466 @wrap_exception()3467 def host_maintenance_mode(self, context, host, mode):3468 """Start/Stop host maintenance window. On start, it triggers3469 guest VMs evacuation.3470 """3471 return self.driver.host_maintenance_mode(host, mode)3472 @wrap_exception()3473 def set_host_enabled(self, context, enabled):3474 """Sets the specified host's ability to accept new instances."""3475 # TODO(russellb) Remove the unused host parameter from the driver API3476 return self.driver.set_host_enabled(None, enabled)3477 @wrap_exception()3478 def get_host_uptime(self, context):3479 """Returns the result of calling "uptime" on the target host."""3480 return self.driver.get_host_uptime(self.host)3481 @object_compat3482 @wrap_exception()3483 @wrap_instance_fault3484 def get_diagnostics(self, context, instance):3485 """Retrieve diagnostics for an instance on this host."""3486 current_power_state = self._get_power_state(context, instance)3487 if current_power_state == power_state.RUNNING:3488 LOG.audit(_("Retrieving diagnostics"), context=context,3489 instance=instance)3490 return self.driver.get_diagnostics(instance)3491 else:3492 raise exception.InstanceInvalidState(3493 attr='power_state',3494 instance_uuid=instance.uuid,3495 state=instance.power_state,3496 method='get_diagnostics')3497 @object_compat3498 @wrap_exception()3499 @wrap_instance_fault3500 def get_instance_diagnostics(self, context, instance):3501 """Retrieve diagnostics for an instance on this host."""3502 current_power_state = self._get_power_state(context, instance)3503 if current_power_state == power_state.RUNNING:3504 LOG.audit(_("Retrieving diagnostics"), context=context,3505 instance=instance)3506 diags = self.driver.get_instance_diagnostics(instance)3507 return diags.serialize()3508 else:3509 raise exception.InstanceInvalidState(3510 attr='power_state',3511 instance_uuid=instance.uuid,3512 state=instance.power_state,3513 method='get_diagnostics')3514 @wrap_exception()3515 @reverts_task_state3516 @wrap_instance_event3517 @wrap_instance_fault3518 def suspend_instance(self, context, instance):3519 """Suspend the given instance."""3520 context = context.elevated()3521 # Store the old state3522 instance.system_metadata['old_vm_state'] = instance.vm_state3523 with self._error_out_instance_on_exception(context, instance,3524 instance_state=instance['vm_state']):3525 self.driver.suspend(instance)3526 current_power_state = self._get_power_state(context, instance)3527 instance.power_state = current_power_state3528 instance.vm_state = vm_states.SUSPENDED3529 instance.task_state = None3530 instance.save(expected_task_state=task_states.SUSPENDING)3531 self._notify_about_instance_usage(context, instance, 'suspend')3532 @wrap_exception()3533 @reverts_task_state3534 @wrap_instance_event3535 @wrap_instance_fault3536 def resume_instance(self, context, instance):3537 """Resume the given suspended instance."""3538 context = context.elevated()3539 LOG.audit(_('Resuming'), context=context, instance=instance)3540 network_info = self._get_instance_nw_info(context, instance)3541 block_device_info = self._get_instance_block_device_info(3542 context, instance)3543 self.driver.resume(context, instance, network_info,3544 block_device_info)3545 instance.power_state = self._get_power_state(context, instance)3546 # We default to the ACTIVE state for backwards compatibility3547 instance.vm_state = instance.system_metadata.pop('old_vm_state',3548 vm_states.ACTIVE)3549 instance.task_state = None3550 instance.save(expected_task_state=task_states.RESUMING)3551 self._notify_about_instance_usage(context, instance, 'resume')3552 @wrap_exception()3553 @reverts_task_state3554 @wrap_instance_event3555 @wrap_instance_fault3556 def shelve_instance(self, context, instance, image_id,3557 clean_shutdown=True):3558 """Shelve an instance.3559 This should be used when you want to take a snapshot of the instance.3560 It also adds system_metadata that can be used by a periodic task to3561 offload the shelved instance after a period of time.3562 :param context: request context3563 :param instance: an Instance object3564 :param image_id: an image id to snapshot to.3565 """3566 self.conductor_api.notify_usage_exists(3567 context, obj_base.obj_to_primitive(instance),3568 current_period=True)3569 self._notify_about_instance_usage(context, instance, 'shelve.start')3570 def update_task_state(task_state, expected_state=task_states.SHELVING):3571 shelving_state_map = {3572 task_states.IMAGE_PENDING_UPLOAD:3573 task_states.SHELVING_IMAGE_PENDING_UPLOAD,3574 task_states.IMAGE_UPLOADING:3575 task_states.SHELVING_IMAGE_UPLOADING,3576 task_states.SHELVING: task_states.SHELVING}3577 task_state = shelving_state_map[task_state]3578 expected_state = shelving_state_map[expected_state]3579 instance.task_state = task_state3580 instance.save(expected_task_state=expected_state)3581 self._power_off_instance(context, instance, clean_shutdown)3582 current_power_state = self._get_power_state(context, instance)3583 self.driver.snapshot(context, instance, image_id, update_task_state)3584 instance.system_metadata['shelved_at'] = timeutils.strtime()3585 instance.system_metadata['shelved_image_id'] = image_id3586 instance.system_metadata['shelved_host'] = self.host3587 instance.vm_state = vm_states.SHELVED3588 instance.task_state = None3589 if CONF.shelved_offload_time == 0:3590 instance.task_state = task_states.SHELVING_OFFLOADING3591 instance.power_state = current_power_state3592 instance.save(expected_task_state=[3593 task_states.SHELVING,3594 task_states.SHELVING_IMAGE_UPLOADING])3595 self._notify_about_instance_usage(context, instance, 'shelve.end')3596 if CONF.shelved_offload_time == 0:3597 self.shelve_offload_instance(context, instance)3598 @wrap_exception()3599 @reverts_task_state3600 @wrap_instance_fault3601 def shelve_offload_instance(self, context, instance):3602 """Remove a shelved instance from the hypervisor.3603 This frees up those resources for use by other instances, but may lead3604 to slower unshelve times for this instance. This method is used by3605 volume backed instances since restoring them doesn't involve the3606 potentially large download of an image.3607 :param context: request context3608 :param instance: nova.objects.instance.Instance3609 """3610 self._notify_about_instance_usage(context, instance,3611 'shelve_offload.start')3612 self.driver.power_off(instance)3613 current_power_state = self._get_power_state(context, instance)3614 network_info = self._get_instance_nw_info(context, instance)3615 block_device_info = self._get_instance_block_device_info(context,3616 instance)3617 self.driver.destroy(context, instance, network_info,3618 block_device_info)3619 instance.power_state = current_power_state3620 instance.host = None3621 instance.node = None3622 instance.vm_state = vm_states.SHELVED_OFFLOADED3623 instance.task_state = None3624 instance.save(expected_task_state=[task_states.SHELVING,3625 task_states.SHELVING_OFFLOADING])3626 self._notify_about_instance_usage(context, instance,3627 'shelve_offload.end')3628 @wrap_exception()3629 @reverts_task_state3630 @wrap_instance_event3631 @wrap_instance_fault3632 def unshelve_instance(self, context, instance, image,3633 filter_properties=None, node=None):3634 """Unshelve the instance.3635 :param context: request context3636 :param instance: a nova.objects.instance.Instance object3637 :param image: an image to build from. If None we assume a3638 volume backed instance.3639 :param filter_properties: dict containing limits, retry info etc.3640 :param node: target compute node3641 """3642 if filter_properties is None:3643 filter_properties = {}3644 @utils.synchronized(instance['uuid'])3645 def do_unshelve_instance():3646 self._unshelve_instance(context, instance, image,3647 filter_properties, node)3648 do_unshelve_instance()3649 def _unshelve_instance_key_scrub(self, instance):3650 """Remove data from the instance that may cause side effects."""3651 cleaned_keys = dict(3652 key_data=instance.key_data,3653 auto_disk_config=instance.auto_disk_config)3654 instance.key_data = None3655 instance.auto_disk_config = False3656 return cleaned_keys3657 def _unshelve_instance_key_restore(self, instance, keys):3658 """Restore previously scrubbed keys before saving the instance."""3659 instance.update(keys)3660 def _unshelve_instance(self, context, instance, image, filter_properties,3661 node):3662 self._notify_about_instance_usage(context, instance, 'unshelve.start')3663 instance.task_state = task_states.SPAWNING3664 instance.save()3665 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(3666 context, instance.uuid)3667 block_device_info = self._prep_block_device(context, instance, bdms,3668 do_check_attach=False)3669 scrubbed_keys = self._unshelve_instance_key_scrub(instance)3670 if node is None:3671 node = self.driver.get_available_nodes()[0]3672 LOG.debug('No node specified, defaulting to %s', node,3673 instance=instance)3674 rt = self._get_resource_tracker(node)3675 limits = filter_properties.get('limits', {})3676 if image:3677 shelved_image_ref = instance.image_ref3678 instance.image_ref = image['id']3679 self.network_api.migrate_instance_finish(context, instance,3680 {'source_compute': '', 'dest_compute': self.host})3681 network_info = self._get_instance_nw_info(context, instance)3682 try:3683 with rt.instance_claim(context, instance, limits):3684 self.driver.spawn(context, instance, image, injected_files=[],3685 admin_password=None,3686 network_info=network_info,3687 block_device_info=block_device_info)3688 except Exception:3689 with excutils.save_and_reraise_exception():3690 LOG.exception(_LE('Instance failed to spawn'),3691 instance=instance)3692 if image:3693 instance.image_ref = shelved_image_ref3694 self.image_api.delete(context, image['id'])3695 self._unshelve_instance_key_restore(instance, scrubbed_keys)3696 instance.power_state = self._get_power_state(context, instance)3697 instance.vm_state = vm_states.ACTIVE3698 instance.task_state = None3699 instance.launched_at = timeutils.utcnow()3700 instance.save(expected_task_state=task_states.SPAWNING)3701 self._notify_about_instance_usage(context, instance, 'unshelve.end')3702 @messaging.expected_exceptions(NotImplementedError)3703 @wrap_instance_fault3704 def reset_network(self, context, instance):3705 """Reset networking on the given instance."""3706 LOG.debug('Reset network', context=context, instance=instance)3707 self.driver.reset_network(instance)3708 def _inject_network_info(self, context, instance, network_info):3709 """Inject network info for the given instance."""3710 LOG.debug('Inject network info', context=context, instance=instance)3711 LOG.debug('network_info to inject: |%s|', network_info,3712 instance=instance)3713 self.driver.inject_network_info(instance,3714 network_info)3715 @wrap_instance_fault3716 def inject_network_info(self, context, instance):3717 """Inject network info, but don't return the info."""3718 network_info = self._get_instance_nw_info(context, instance)3719 self._inject_network_info(context, instance, network_info)3720 @object_compat3721 @messaging.expected_exceptions(NotImplementedError,3722 exception.InstanceNotFound)3723 @wrap_exception()3724 @wrap_instance_fault3725 def get_console_output(self, context, instance, tail_length):3726 """Send the console output for the given instance."""3727 context = context.elevated()3728 LOG.audit(_("Get console output"), context=context,3729 instance=instance)3730 output = self.driver.get_console_output(context, instance)3731 if tail_length is not None:3732 output = self._tail_log(output, tail_length)3733 return output.decode('utf-8', 'replace').encode('ascii', 'replace')3734 def _tail_log(self, log, length):3735 try:3736 length = int(length)3737 except ValueError:3738 length = 03739 if length == 0:3740 return ''3741 else:3742 return '\n'.join(log.split('\n')[-int(length):])3743 @messaging.expected_exceptions(exception.ConsoleTypeInvalid,3744 exception.InstanceNotReady,3745 exception.InstanceNotFound,3746 exception.ConsoleTypeUnavailable,3747 NotImplementedError)3748 @object_compat3749 @wrap_exception()3750 @wrap_instance_fault3751 def get_vnc_console(self, context, console_type, instance):3752 """Return connection information for a vnc console."""3753 context = context.elevated()3754 LOG.debug("Getting vnc console", instance=instance)3755 token = str(uuid.uuid4())3756 if not CONF.vnc_enabled:3757 raise exception.ConsoleTypeUnavailable(console_type=console_type)3758 if console_type == 'novnc':3759 # For essex, novncproxy_base_url must include the full path3760 # including the html file (like http://myhost/vnc_auto.html)3761 access_url = '%s?token=%s' % (CONF.novncproxy_base_url, token)3762 elif console_type == 'xvpvnc':3763 access_url = '%s?token=%s' % (CONF.xvpvncproxy_base_url, token)3764 else:3765 raise exception.ConsoleTypeInvalid(console_type=console_type)3766 try:3767 # Retrieve connect info from driver, and then decorate with our3768 # access info token3769 console = self.driver.get_vnc_console(context, instance)3770 connect_info = console.get_connection_info(token, access_url)3771 except exception.InstanceNotFound:3772 if instance['vm_state'] != vm_states.BUILDING:3773 raise3774 raise exception.InstanceNotReady(instance_id=instance['uuid'])3775 return connect_info3776 @object_compat3777 @messaging.expected_exceptions(exception.ConsoleTypeInvalid,3778 exception.InstanceNotReady,3779 exception.InstanceNotFound,3780 exception.ConsoleTypeUnavailable)3781 @wrap_exception()3782 @wrap_instance_fault3783 def get_spice_console(self, context, console_type, instance):3784 """Return connection information for a spice console."""3785 context = context.elevated()3786 LOG.debug("Getting spice console", instance=instance)3787 token = str(uuid.uuid4())3788 if not CONF.spice.enabled:3789 raise exception.ConsoleTypeUnavailable(console_type=console_type)3790 if console_type == 'spice-html5':3791 # For essex, spicehtml5proxy_base_url must include the full path3792 # including the html file (like http://myhost/spice_auto.html)3793 access_url = '%s?token=%s' % (CONF.spice.html5proxy_base_url,3794 token)3795 else:3796 raise exception.ConsoleTypeInvalid(console_type=console_type)3797 try:3798 # Retrieve connect info from driver, and then decorate with our3799 # access info token3800 console = self.driver.get_spice_console(context, instance)3801 connect_info = console.get_connection_info(token, access_url)3802 except exception.InstanceNotFound:3803 if instance['vm_state'] != vm_states.BUILDING:3804 raise3805 raise exception.InstanceNotReady(instance_id=instance['uuid'])3806 return connect_info3807 @object_compat3808 @messaging.expected_exceptions(exception.ConsoleTypeInvalid,3809 exception.InstanceNotReady,3810 exception.InstanceNotFound,3811 exception.ConsoleTypeUnavailable,3812 NotImplementedError)3813 @wrap_exception()3814 @wrap_instance_fault3815 def get_rdp_console(self, context, console_type, instance):3816 """Return connection information for a RDP console."""3817 context = context.elevated()3818 LOG.debug("Getting RDP console", instance=instance)3819 token = str(uuid.uuid4())3820 if not CONF.rdp.enabled:3821 raise exception.ConsoleTypeUnavailable(console_type=console_type)3822 if console_type == 'rdp-html5':3823 access_url = '%s?token=%s' % (CONF.rdp.html5_proxy_base_url,3824 token)3825 else:3826 raise exception.ConsoleTypeInvalid(console_type=console_type)3827 try:3828 # Retrieve connect info from driver, and then decorate with our3829 # access info token3830 console = self.driver.get_rdp_console(context, instance)3831 connect_info = console.get_connection_info(token, access_url)3832 except exception.InstanceNotFound:3833 if instance['vm_state'] != vm_states.BUILDING:3834 raise3835 raise exception.InstanceNotReady(instance_id=instance['uuid'])3836 return connect_info3837 @messaging.expected_exceptions(3838 exception.ConsoleTypeInvalid,3839 exception.InstanceNotReady,3840 exception.InstanceNotFound,3841 exception.ConsoleTypeUnavailable,3842 exception.SocketPortRangeExhaustedException,3843 exception.ImageSerialPortNumberInvalid,3844 exception.ImageSerialPortNumberExceedFlavorValue,3845 NotImplementedError)3846 @wrap_exception()3847 @wrap_instance_fault3848 def get_serial_console(self, context, console_type, instance):3849 """Returns connection information for a serial console."""3850 LOG.debug("Getting serial console", instance=instance)3851 if not CONF.serial_console.enabled:3852 raise exception.ConsoleTypeUnavailable(console_type=console_type)3853 context = context.elevated()3854 token = str(uuid.uuid4())3855 access_url = '%s?token=%s' % (CONF.serial_console.base_url, token)3856 try:3857 # Retrieve connect info from driver, and then decorate with our3858 # access info token3859 console = self.driver.get_serial_console(context, instance)3860 connect_info = console.get_connection_info(token, access_url)3861 except exception.InstanceNotFound:3862 if instance.vm_state != vm_states.BUILDING:3863 raise3864 raise exception.InstanceNotReady(instance_id=instance['uuid'])3865 return connect_info3866 @messaging.expected_exceptions(exception.ConsoleTypeInvalid,3867 exception.InstanceNotReady,3868 exception.InstanceNotFound)3869 @object_compat3870 @wrap_exception()3871 @wrap_instance_fault3872 def validate_console_port(self, ctxt, instance, port, console_type):3873 if console_type == "spice-html5":3874 console_info = self.driver.get_spice_console(ctxt, instance)3875 elif console_type == "rdp-html5":3876 console_info = self.driver.get_rdp_console(ctxt, instance)3877 elif console_type == "serial":3878 console_info = self.driver.get_serial_console(ctxt, instance)3879 else:3880 console_info = self.driver.get_vnc_console(ctxt, instance)3881 return console_info.port == port3882 @object_compat3883 @wrap_exception()3884 @reverts_task_state3885 @wrap_instance_fault3886 def reserve_block_device_name(self, context, instance, device,3887 volume_id, disk_bus=None, device_type=None):3888 # NOTE(ndipanov): disk_bus and device_type will be set to None if not3889 # passed (by older clients) and defaulted by the virt driver. Remove3890 # default values on the next major RPC version bump.3891 @utils.synchronized(instance['uuid'])3892 def do_reserve():3893 bdms = (3894 objects.BlockDeviceMappingList.get_by_instance_uuid(3895 context, instance.uuid))3896 device_name = compute_utils.get_device_name_for_instance(3897 context, instance, bdms, device)3898 # NOTE(vish): create bdm here to avoid race condition3899 bdm = objects.BlockDeviceMapping(3900 source_type='volume', destination_type='volume',3901 instance_uuid=instance.uuid,3902 volume_id=volume_id or 'reserved',3903 device_name=device_name,3904 disk_bus=disk_bus, device_type=device_type)3905 bdm.create(context)3906 return device_name3907 return do_reserve()3908 @object_compat3909 @wrap_exception()3910 @reverts_task_state3911 @wrap_instance_fault3912 def attach_volume(self, context, volume_id, mountpoint,3913 instance, bdm=None):3914 """Attach a volume to an instance."""3915 if not bdm:3916 bdm = objects.BlockDeviceMapping.get_by_volume_id(3917 context, volume_id)3918 driver_bdm = driver_block_device.DriverVolumeBlockDevice(bdm)3919 @utils.synchronized(instance.uuid)3920 def do_attach_volume(context, instance, driver_bdm):3921 try:3922 return self._attach_volume(context, instance, driver_bdm)3923 except Exception:3924 with excutils.save_and_reraise_exception():3925 bdm.destroy(context)3926 do_attach_volume(context, instance, driver_bdm)3927 def _attach_volume(self, context, instance, bdm):3928 context = context.elevated()3929 LOG.audit(_('Attaching volume %(volume_id)s to %(mountpoint)s'),3930 {'volume_id': bdm.volume_id,3931 'mountpoint': bdm['mount_device']},3932 context=context, instance=instance)3933 try:3934 bdm.attach(context, instance, self.volume_api, self.driver,3935 do_check_attach=False, do_driver_attach=True)3936 except Exception: # pylint: disable=W07023937 with excutils.save_and_reraise_exception():3938 LOG.exception(_LE("Failed to attach %(volume_id)s "3939 "at %(mountpoint)s"),3940 {'volume_id': bdm.volume_id,3941 'mountpoint': bdm['mount_device']},3942 context=context, instance=instance)3943 self.volume_api.unreserve_volume(context, bdm.volume_id)3944 info = {'volume_id': bdm.volume_id}3945 self._notify_about_instance_usage(3946 context, instance, "volume.attach", extra_usage_info=info)3947 def _detach_volume(self, context, instance, bdm):3948 """Do the actual driver detach using block device mapping."""3949 mp = bdm.device_name3950 volume_id = bdm.volume_id3951 LOG.audit(_('Detach volume %(volume_id)s from mountpoint %(mp)s'),3952 {'volume_id': volume_id, 'mp': mp},3953 context=context, instance=instance)3954 connection_info = jsonutils.loads(bdm.connection_info)3955 # NOTE(vish): We currently don't use the serial when disconnecting,3956 # but added for completeness in case we ever do.3957 if connection_info and 'serial' not in connection_info:3958 connection_info['serial'] = volume_id3959 try:3960 if not self.driver.instance_exists(instance):3961 LOG.warn(_('Detaching volume from unknown instance'),3962 context=context, instance=instance)3963 encryption = encryptors.get_encryption_metadata(3964 context, self.volume_api, volume_id, connection_info)3965 self.driver.detach_volume(connection_info,3966 instance,3967 mp,3968 encryption=encryption)3969 except Exception: # pylint: disable=W07023970 with excutils.save_and_reraise_exception():3971 LOG.exception(_LE('Failed to detach volume %(volume_id)s '3972 'from %(mp)s'),3973 {'volume_id': volume_id, 'mp': mp},3974 context=context, instance=instance)3975 self.volume_api.roll_detaching(context, volume_id)3976 @object_compat3977 @wrap_exception()3978 @reverts_task_state3979 @wrap_instance_fault3980 def detach_volume(self, context, volume_id, instance):3981 """Detach a volume from an instance."""3982 bdm = objects.BlockDeviceMapping.get_by_volume_id(3983 context, volume_id)3984 if CONF.volume_usage_poll_interval > 0:3985 vol_stats = []3986 mp = bdm.device_name3987 # Handle bootable volumes which will not contain /dev/3988 if '/dev/' in mp:3989 mp = mp[5:]3990 try:3991 vol_stats = self.driver.block_stats(instance.name, mp)3992 except NotImplementedError:3993 pass3994 if vol_stats:3995 LOG.debug("Updating volume usage cache with totals",3996 instance=instance)3997 rd_req, rd_bytes, wr_req, wr_bytes, flush_ops = vol_stats3998 self.conductor_api.vol_usage_update(context, volume_id,3999 rd_req, rd_bytes,4000 wr_req, wr_bytes,4001 instance,4002 update_totals=True)4003 self._detach_volume(context, instance, bdm)4004 connector = self.driver.get_volume_connector(instance)4005 self.volume_api.terminate_connection(context, volume_id, connector)4006 bdm.destroy()4007 info = dict(volume_id=volume_id)4008 self._notify_about_instance_usage(4009 context, instance, "volume.detach", extra_usage_info=info)4010 self.volume_api.detach(context.elevated(), volume_id)4011 def _init_volume_connection(self, context, new_volume_id,4012 old_volume_id, connector, instance, bdm):4013 new_cinfo = self.volume_api.initialize_connection(context,4014 new_volume_id,4015 connector)4016 old_cinfo = jsonutils.loads(bdm['connection_info'])4017 if old_cinfo and 'serial' not in old_cinfo:4018 old_cinfo['serial'] = old_volume_id4019 new_cinfo['serial'] = old_cinfo['serial']4020 return (old_cinfo, new_cinfo)4021 def _swap_volume(self, context, instance, bdm, connector, old_volume_id,4022 new_volume_id):4023 mountpoint = bdm['device_name']4024 failed = False4025 new_cinfo = None4026 resize_to = 04027 try:4028 old_cinfo, new_cinfo = self._init_volume_connection(context,4029 new_volume_id,4030 old_volume_id,4031 connector,4032 instance,4033 bdm)4034 old_vol_size = self.volume_api.get(context, old_volume_id)['size']4035 new_vol_size = self.volume_api.get(context, new_volume_id)['size']4036 if new_vol_size > old_vol_size:4037 resize_to = new_vol_size4038 self.driver.swap_volume(old_cinfo, new_cinfo, instance, mountpoint,4039 resize_to)4040 except Exception: # pylint: disable=W07024041 failed = True4042 with excutils.save_and_reraise_exception():4043 if new_cinfo:4044 msg = _LE("Failed to swap volume %(old_volume_id)s "4045 "for %(new_volume_id)s")4046 LOG.exception(msg, {'old_volume_id': old_volume_id,4047 'new_volume_id': new_volume_id},4048 context=context,4049 instance=instance)4050 else:4051 msg = _LE("Failed to connect to volume %(volume_id)s "4052 "with volume at %(mountpoint)s")4053 LOG.exception(msg, {'volume_id': new_volume_id,4054 'mountpoint': bdm['device_name']},4055 context=context,4056 instance=instance)4057 self.volume_api.roll_detaching(context, old_volume_id)4058 self.volume_api.unreserve_volume(context, new_volume_id)4059 finally:4060 conn_volume = new_volume_id if failed else old_volume_id4061 if new_cinfo:4062 self.volume_api.terminate_connection(context,4063 conn_volume,4064 connector)4065 # If Cinder initiated the swap, it will keep4066 # the original ID4067 comp_ret = self.volume_api.migrate_volume_completion(4068 context,4069 old_volume_id,4070 new_volume_id,4071 error=failed)4072 return (comp_ret, new_cinfo)4073 @wrap_exception()4074 @reverts_task_state4075 @wrap_instance_fault4076 def swap_volume(self, context, old_volume_id, new_volume_id, instance):4077 """Swap volume for an instance."""4078 context = context.elevated()4079 bdm = objects.BlockDeviceMapping.get_by_volume_id(4080 context, old_volume_id, instance_uuid=instance.uuid)4081 connector = self.driver.get_volume_connector(instance)4082 comp_ret, new_cinfo = self._swap_volume(context, instance,4083 bdm,4084 connector,4085 old_volume_id,4086 new_volume_id)4087 save_volume_id = comp_ret['save_volume_id']4088 mountpoint = bdm.device_name4089 # Update bdm4090 values = {4091 'connection_info': jsonutils.dumps(new_cinfo),4092 'delete_on_termination': False,4093 'source_type': 'volume',4094 'destination_type': 'volume',4095 'snapshot_id': None,4096 'volume_id': save_volume_id,4097 'volume_size': None,4098 'no_device': None}4099 bdm.update(values)4100 bdm.save()4101 self.volume_api.attach(context,4102 new_volume_id,4103 instance.uuid,4104 mountpoint)4105 # Remove old connection4106 self.volume_api.detach(context.elevated(), old_volume_id)4107 @wrap_exception()4108 def remove_volume_connection(self, context, volume_id, instance):4109 """Remove a volume connection using the volume api."""4110 # NOTE(vish): We don't want to actually mark the volume4111 # detached, or delete the bdm, just remove the4112 # connection from this host.4113 # NOTE(PhilDay): Can't use object_compat decorator here as4114 # instance is not the second parameter4115 if isinstance(instance, dict):4116 metas = ['metadata', 'system_metadata']4117 instance = objects.Instance._from_db_object(4118 context, objects.Instance(), instance,4119 expected_attrs=metas)4120 instance._context = context4121 try:4122 bdm = objects.BlockDeviceMapping.get_by_volume_id(4123 context, volume_id)4124 self._detach_volume(context, instance, bdm)4125 connector = self.driver.get_volume_connector(instance)4126 self.volume_api.terminate_connection(context, volume_id, connector)4127 except exception.NotFound:4128 pass4129 @object_compat4130 @wrap_exception()4131 @reverts_task_state4132 @wrap_instance_fault4133 def attach_interface(self, context, instance, network_id, port_id,4134 requested_ip):4135 """Use hotplug to add an network adapter to an instance."""4136 network_info = self.network_api.allocate_port_for_instance(4137 context, instance, port_id, network_id, requested_ip)4138 if len(network_info) != 1:4139 LOG.error(_('allocate_port_for_instance returned %(ports)s ports')4140 % dict(ports=len(network_info)))4141 raise exception.InterfaceAttachFailed(4142 instance_uuid=instance.uuid)4143 image_ref = instance.get('image_ref')4144 image_meta = compute_utils.get_image_metadata(4145 context, self.image_api, image_ref, instance)4146 self.driver.attach_interface(instance, image_meta, network_info[0])4147 return network_info[0]4148 @object_compat4149 @wrap_exception()4150 @reverts_task_state4151 @wrap_instance_fault4152 def detach_interface(self, context, instance, port_id):4153 """Detach an network adapter from an instance."""4154 network_info = instance.info_cache.network_info4155 condemned = None4156 for vif in network_info:4157 if vif['id'] == port_id:4158 condemned = vif4159 break4160 if condemned is None:4161 raise exception.PortNotFound(_("Port %s is not "4162 "attached") % port_id)4163 self.network_api.deallocate_port_for_instance(context, instance,4164 port_id)4165 self.driver.detach_interface(instance, condemned)4166 def _get_compute_info(self, context, host):4167 service = objects.Service.get_by_compute_host(context, host)4168 try:4169 return service.compute_node4170 except IndexError:4171 raise exception.NotFound(_("Host %s not found") % host)4172 @wrap_exception()4173 def check_instance_shared_storage(self, ctxt, instance, data):4174 """Check if the instance files are shared4175 :param context: security context4176 :param data: result of driver.check_instance_shared_storage_local4177 Returns True if instance disks located on shared storage and4178 False otherwise.4179 """4180 return self.driver.check_instance_shared_storage_remote(ctxt, data)4181 @wrap_exception()4182 @wrap_instance_fault4183 def check_can_live_migrate_destination(self, ctxt, instance,4184 block_migration, disk_over_commit):4185 """Check if it is possible to execute live migration.4186 This runs checks on the destination host, and then calls4187 back to the source host to check the results.4188 :param context: security context4189 :param instance: dict of instance data4190 :param block_migration: if true, prepare for block migration4191 :param disk_over_commit: if true, allow disk over commit4192 :returns: a dict containing migration info4193 """4194 src_compute_info = obj_base.obj_to_primitive(4195 self._get_compute_info(ctxt, instance.host))4196 dst_compute_info = obj_base.obj_to_primitive(4197 self._get_compute_info(ctxt, CONF.host))4198 dest_check_data = self.driver.check_can_live_migrate_destination(ctxt,4199 instance, src_compute_info, dst_compute_info,4200 block_migration, disk_over_commit)4201 migrate_data = {}4202 try:4203 migrate_data = self.compute_rpcapi.\4204 check_can_live_migrate_source(ctxt, instance,4205 dest_check_data)4206 finally:4207 self.driver.check_can_live_migrate_destination_cleanup(ctxt,4208 dest_check_data)4209 if 'migrate_data' in dest_check_data:4210 migrate_data.update(dest_check_data['migrate_data'])4211 return migrate_data4212 @wrap_exception()4213 @wrap_instance_fault4214 def check_can_live_migrate_source(self, ctxt, instance, dest_check_data):4215 """Check if it is possible to execute live migration.4216 This checks if the live migration can succeed, based on the4217 results from check_can_live_migrate_destination.4218 :param context: security context4219 :param instance: dict of instance data4220 :param dest_check_data: result of check_can_live_migrate_destination4221 :returns: a dict containing migration info4222 """4223 is_volume_backed = self.compute_api.is_volume_backed_instance(ctxt,4224 instance)4225 dest_check_data['is_volume_backed'] = is_volume_backed4226 return self.driver.check_can_live_migrate_source(ctxt, instance,4227 dest_check_data)4228 @object_compat4229 @wrap_exception()4230 @wrap_instance_fault4231 def pre_live_migration(self, context, instance, block_migration, disk,4232 migrate_data):4233 """Preparations for live migration at dest host.4234 :param context: security context4235 :param instance: dict of instance data4236 :param block_migration: if true, prepare for block migration4237 :param migrate_data: if not None, it is a dict which holds data4238 required for live migration without shared4239 storage.4240 """4241 block_device_info = self._get_instance_block_device_info(4242 context, instance, refresh_conn_info=True)4243 network_info = self._get_instance_nw_info(context, instance)4244 self._notify_about_instance_usage(4245 context, instance, "live_migration.pre.start",4246 network_info=network_info)4247 pre_live_migration_data = self.driver.pre_live_migration(context,4248 instance,4249 block_device_info,4250 network_info,4251 disk,4252 migrate_data)4253 # NOTE(tr3buchet): setup networks on destination host4254 self.network_api.setup_networks_on_host(context, instance,4255 self.host)4256 # Creating filters to hypervisors and firewalls.4257 # An example is that nova-instance-instance-xxx,4258 # which is written to libvirt.xml(Check "virsh nwfilter-list")4259 # This nwfilter is necessary on the destination host.4260 # In addition, this method is creating filtering rule4261 # onto destination host.4262 self.driver.ensure_filtering_rules_for_instance(instance,4263 network_info)4264 self._notify_about_instance_usage(4265 context, instance, "live_migration.pre.end",4266 network_info=network_info)4267 return pre_live_migration_data4268 @wrap_exception()4269 @wrap_instance_fault4270 def live_migration(self, context, dest, instance, block_migration,4271 migrate_data):4272 """Executing live migration.4273 :param context: security context4274 :param instance: a nova.objects.instance.Instance object4275 :param dest: destination host4276 :param block_migration: if true, prepare for block migration4277 :param migrate_data: implementation specific params4278 """4279 # NOTE(danms): since instance is not the first parameter, we can't4280 # use @object_compat on this method. Since this is the only example,4281 # we do this manually instead of complicating the decorator4282 if not isinstance(instance, obj_base.NovaObject):4283 expected = ['metadata', 'system_metadata',4284 'security_groups', 'info_cache']4285 instance = objects.Instance._from_db_object(4286 context, objects.Instance(), instance,4287 expected_attrs=expected)4288 # Create a local copy since we'll be modifying the dictionary4289 migrate_data = dict(migrate_data or {})4290 try:4291 if block_migration:4292 disk = self.driver.get_instance_disk_info(instance.name)4293 else:4294 disk = None4295 pre_migration_data = self.compute_rpcapi.pre_live_migration(4296 context, instance,4297 block_migration, disk, dest, migrate_data)4298 migrate_data['pre_live_migration_result'] = pre_migration_data4299 except Exception:4300 with excutils.save_and_reraise_exception():4301 LOG.exception(_LE('Pre live migration failed at %s'),4302 dest, instance=instance)4303 self._rollback_live_migration(context, instance, dest,4304 block_migration, migrate_data)4305 # Executing live migration4306 # live_migration might raises exceptions, but4307 # nothing must be recovered in this version.4308 self.driver.live_migration(context, instance, dest,4309 self._post_live_migration,4310 self._rollback_live_migration,4311 block_migration, migrate_data)4312 def _live_migration_cleanup_flags(self, block_migration, migrate_data):4313 """Determine whether disks or intance path need to be cleaned up after4314 live migration (at source on success, at destination on rollback)4315 Block migration needs empty image at destination host before migration4316 starts, so if any failure occurs, any empty images has to be deleted.4317 Also Volume backed live migration w/o shared storage needs to delete4318 newly created instance-xxx dir on the destination as a part of its4319 rollback process4320 :param block_migration: if true, it was a block migration4321 :param migrate_data: implementation specific data4322 :returns: (bool, bool) -- do_cleanup, destroy_disks4323 """4324 # NOTE(angdraug): block migration wouldn't have been allowed if either4325 # block storage or instance path were shared4326 is_shared_block_storage = not block_migration4327 is_shared_instance_path = not block_migration4328 if migrate_data:4329 is_shared_block_storage = migrate_data.get(4330 'is_shared_block_storage', is_shared_block_storage)4331 is_shared_instance_path = migrate_data.get(4332 'is_shared_instance_path', is_shared_instance_path)4333 # No instance booting at source host, but instance dir4334 # must be deleted for preparing next block migration4335 # must be deleted for preparing next live migration w/o shared storage4336 do_cleanup = block_migration or not is_shared_instance_path4337 destroy_disks = not is_shared_block_storage4338 return (do_cleanup, destroy_disks)4339 @wrap_exception()4340 @wrap_instance_fault4341 def _post_live_migration(self, ctxt, instance,4342 dest, block_migration=False, migrate_data=None):4343 """Post operations for live migration.4344 This method is called from live_migration4345 and mainly updating database record.4346 :param ctxt: security context4347 :param instance: instance dict4348 :param dest: destination host4349 :param block_migration: if true, prepare for block migration4350 :param migrate_data: if not None, it is a dict which has data4351 required for live migration without shared storage4352 """4353 LOG.info(_('_post_live_migration() is started..'),4354 instance=instance)4355 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(4356 ctxt, instance['uuid'])4357 # Cleanup source host post live-migration4358 block_device_info = self._get_instance_block_device_info(4359 ctxt, instance, bdms=bdms)4360 self.driver.post_live_migration(ctxt, instance, block_device_info,4361 migrate_data)4362 # Detaching volumes.4363 connector = self.driver.get_volume_connector(instance)4364 for bdm in bdms:4365 # NOTE(vish): We don't want to actually mark the volume4366 # detached, or delete the bdm, just remove the4367 # connection from this host.4368 # remove the volume connection without detaching from hypervisor4369 # because the instance is not running anymore on the current host4370 if bdm.is_volume:4371 self.volume_api.terminate_connection(ctxt, bdm.volume_id,4372 connector)4373 # Releasing vlan.4374 # (not necessary in current implementation?)4375 network_info = self._get_instance_nw_info(ctxt, instance)4376 self._notify_about_instance_usage(ctxt, instance,4377 "live_migration._post.start",4378 network_info=network_info)4379 # Releasing security group ingress rule.4380 self.driver.unfilter_instance(instance,4381 network_info)4382 migration = {'source_compute': self.host,4383 'dest_compute': dest, }4384 self.network_api.migrate_instance_start(ctxt,4385 instance,4386 migration)4387 destroy_vifs = False4388 try:4389 self.driver.post_live_migration_at_source(ctxt, instance,4390 network_info)4391 except NotImplementedError as ex:4392 LOG.debug(ex, instance=instance)4393 # For all hypervisors other than libvirt, there is a possibility4394 # they are unplugging networks from source node in the cleanup4395 # method4396 destroy_vifs = True4397 # Define domain at destination host, without doing it,4398 # pause/suspend/terminate do not work.4399 self.compute_rpcapi.post_live_migration_at_destination(ctxt,4400 instance, block_migration, dest)4401 do_cleanup, destroy_disks = self._live_migration_cleanup_flags(4402 block_migration, migrate_data)4403 if do_cleanup:4404 self.driver.cleanup(ctxt, instance, network_info,4405 destroy_disks=destroy_disks,4406 migrate_data=migrate_data,4407 destroy_vifs=destroy_vifs)4408 # NOTE(tr3buchet): tear down networks on source host4409 self.network_api.setup_networks_on_host(ctxt, instance,4410 self.host, teardown=True)4411 self.instance_events.clear_events_for_instance(instance)4412 # NOTE(timello): make sure we update available resources on source4413 # host even before next periodic task.4414 self.update_available_resource(ctxt)4415 self._notify_about_instance_usage(ctxt, instance,4416 "live_migration._post.end",4417 network_info=network_info)4418 LOG.info(_('Migrating instance to %s finished successfully.'),4419 dest, instance=instance)4420 LOG.info(_("You may see the error \"libvirt: QEMU error: "4421 "Domain not found: no domain with matching name.\" "4422 "This error can be safely ignored."),4423 instance=instance)4424 if CONF.vnc_enabled or CONF.spice.enabled or CONF.rdp.enabled:4425 if CONF.cells.enable:4426 self.cells_rpcapi.consoleauth_delete_tokens(ctxt,4427 instance['uuid'])4428 else:4429 self.consoleauth_rpcapi.delete_tokens_for_instance(ctxt,4430 instance['uuid'])4431 @object_compat4432 @wrap_exception()4433 @wrap_instance_fault4434 def post_live_migration_at_destination(self, context, instance,4435 block_migration):4436 """Post operations for live migration .4437 :param context: security context4438 :param instance: Instance dict4439 :param block_migration: if true, prepare for block migration4440 """4441 LOG.info(_('Post operation of migration started'),4442 instance=instance)4443 # NOTE(tr3buchet): setup networks on destination host4444 # this is called a second time because4445 # multi_host does not create the bridge in4446 # plug_vifs4447 self.network_api.setup_networks_on_host(context, instance,4448 self.host)4449 migration = {'source_compute': instance['host'],4450 'dest_compute': self.host, }4451 self.network_api.migrate_instance_finish(context,4452 instance,4453 migration)4454 network_info = self._get_instance_nw_info(context, instance)4455 self._notify_about_instance_usage(4456 context, instance, "live_migration.post.dest.start",4457 network_info=network_info)4458 block_device_info = self._get_instance_block_device_info(context,4459 instance)4460 self.driver.post_live_migration_at_destination(context, instance,4461 network_info,4462 block_migration, block_device_info)4463 # Restore instance state4464 current_power_state = self._get_power_state(context, instance)4465 node_name = None4466 try:4467 compute_node = self._get_compute_info(context, self.host)4468 node_name = compute_node.hypervisor_hostname4469 except exception.NotFound:4470 LOG.exception(_LE('Failed to get compute_info for %s'), self.host)4471 finally:4472 instance.host = self.host4473 instance.power_state = current_power_state4474 instance.vm_state = vm_states.ACTIVE4475 instance.task_state = None4476 instance.node = node_name4477 instance.save(expected_task_state=task_states.MIGRATING)4478 # NOTE(vish): this is necessary to update dhcp4479 self.network_api.setup_networks_on_host(context, instance, self.host)4480 self._notify_about_instance_usage(4481 context, instance, "live_migration.post.dest.end",4482 network_info=network_info)4483 @wrap_exception()4484 @wrap_instance_fault4485 def _rollback_live_migration(self, context, instance,4486 dest, block_migration, migrate_data=None):4487 """Recovers Instance/volume state from migrating -> running.4488 :param context: security context4489 :param instance: nova.db.sqlalchemy.models.Instance4490 :param dest:4491 This method is called from live migration src host.4492 This param specifies destination host.4493 :param block_migration: if true, prepare for block migration4494 :param migrate_data:4495 if not none, contains implementation specific data.4496 """4497 instance.vm_state = vm_states.ACTIVE4498 instance.task_state = None4499 instance.save(expected_task_state=[task_states.MIGRATING])4500 # NOTE(tr3buchet): setup networks on source host (really it's re-setup)4501 self.network_api.setup_networks_on_host(context, instance, self.host)4502 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(4503 context, instance['uuid'])4504 for bdm in bdms:4505 if bdm.is_volume:4506 self.compute_rpcapi.remove_volume_connection(4507 context, instance, bdm.volume_id, dest)4508 self._notify_about_instance_usage(context, instance,4509 "live_migration._rollback.start")4510 do_cleanup, destroy_disks = self._live_migration_cleanup_flags(4511 block_migration, migrate_data)4512 if do_cleanup:4513 self.compute_rpcapi.rollback_live_migration_at_destination(4514 context, instance, dest, destroy_disks=destroy_disks,4515 migrate_data=migrate_data)4516 self._notify_about_instance_usage(context, instance,4517 "live_migration._rollback.end")4518 @object_compat4519 @wrap_exception()4520 @wrap_instance_fault4521 def rollback_live_migration_at_destination(self, context, instance,4522 destroy_disks=True,4523 migrate_data=None):4524 """Cleaning up image directory that is created pre_live_migration.4525 :param context: security context4526 :param instance: a nova.objects.instance.Instance object sent over rpc4527 """4528 network_info = self._get_instance_nw_info(context, instance)4529 self._notify_about_instance_usage(4530 context, instance, "live_migration.rollback.dest.start",4531 network_info=network_info)4532 # NOTE(tr3buchet): tear down networks on destination host4533 self.network_api.setup_networks_on_host(context, instance,4534 self.host, teardown=True)4535 # NOTE(vish): The mapping is passed in so the driver can disconnect4536 # from remote volumes if necessary4537 block_device_info = self._get_instance_block_device_info(context,4538 instance)4539 self.driver.rollback_live_migration_at_destination(4540 context, instance, network_info, block_device_info,4541 destroy_disks=destroy_disks, migrate_data=migrate_data)4542 self._notify_about_instance_usage(4543 context, instance, "live_migration.rollback.dest.end",4544 network_info=network_info)4545 @periodic_task.periodic_task(4546 spacing=CONF.heal_instance_info_cache_interval)4547 def _heal_instance_info_cache(self, context):4548 """Called periodically. On every call, try to update the4549 info_cache's network information for another instance by4550 calling to the network manager.4551 This is implemented by keeping a cache of uuids of instances4552 that live on this host. On each call, we pop one off of a4553 list, pull the DB record, and try the call to the network API.4554 If anything errors don't fail, as it's possible the instance4555 has been deleted, etc.4556 """4557 heal_interval = CONF.heal_instance_info_cache_interval4558 if not heal_interval:4559 return4560 instance_uuids = getattr(self, '_instance_uuids_to_heal', [])4561 instance = None4562 LOG.debug('Starting heal instance info cache')4563 if not instance_uuids:4564 # The list of instances to heal is empty so rebuild it4565 LOG.debug('Rebuilding the list of instances to heal')4566 db_instances = objects.InstanceList.get_by_host(4567 context, self.host, expected_attrs=[], use_slave=True)4568 for inst in db_instances:4569 # We don't want to refersh the cache for instances4570 # which are building or deleting so don't put them4571 # in the list. If they are building they will get4572 # added to the list next time we build it.4573 if (inst.vm_state == vm_states.BUILDING):4574 LOG.debug('Skipping network cache update for instance '4575 'because it is Building.', instance=inst)4576 continue4577 if (inst.task_state == task_states.DELETING):4578 LOG.debug('Skipping network cache update for instance '4579 'because it is being deleted.', instance=inst)4580 continue4581 if not instance:4582 # Save the first one we find so we don't4583 # have to get it again4584 instance = inst4585 else:4586 instance_uuids.append(inst['uuid'])4587 self._instance_uuids_to_heal = instance_uuids4588 else:4589 # Find the next valid instance on the list4590 while instance_uuids:4591 try:4592 inst = objects.Instance.get_by_uuid(4593 context, instance_uuids.pop(0),4594 expected_attrs=['system_metadata', 'info_cache'],4595 use_slave=True)4596 except exception.InstanceNotFound:4597 # Instance is gone. Try to grab another.4598 continue4599 # Check the instance hasn't been migrated4600 if inst.host != self.host:4601 LOG.debug('Skipping network cache update for instance '4602 'because it has been migrated to another '4603 'host.', instance=inst)4604 # Check the instance isn't being deleting4605 elif inst.task_state == task_states.DELETING:4606 LOG.debug('Skipping network cache update for instance '4607 'because it is being deleted.', instance=inst)4608 else:4609 instance = inst4610 break4611 if instance:4612 # We have an instance now to refresh4613 try:4614 # Call to network API to get instance info.. this will4615 # force an update to the instance's info_cache4616 self._get_instance_nw_info(context, instance, use_slave=True)4617 LOG.debug('Updated the network info_cache for instance',4618 instance=instance)4619 except Exception:4620 LOG.error(_('An error occurred while refreshing the network '4621 'cache.'), instance=instance, exc_info=True)4622 else:4623 LOG.debug("Didn't find any instances for network info cache "4624 "update.")4625 @periodic_task.periodic_task4626 def _poll_rebooting_instances(self, context):4627 if CONF.reboot_timeout > 0:4628 filters = {'task_state': task_states.REBOOTING,4629 'host': self.host}4630 rebooting = objects.InstanceList.get_by_filters(4631 context, filters, expected_attrs=[], use_slave=True)4632 to_poll = []4633 for instance in rebooting:4634 if timeutils.is_older_than(instance['updated_at'],4635 CONF.reboot_timeout):4636 to_poll.append(instance)4637 self.driver.poll_rebooting_instances(CONF.reboot_timeout, to_poll)4638 @periodic_task.periodic_task4639 def _poll_rescued_instances(self, context):4640 if CONF.rescue_timeout > 0:4641 filters = {'vm_state': vm_states.RESCUED,4642 'host': self.host}4643 rescued_instances = objects.InstanceList.get_by_filters(4644 context, filters, expected_attrs=["system_metadata"],4645 use_slave=True)4646 to_unrescue = []4647 for instance in rescued_instances:4648 if timeutils.is_older_than(instance['launched_at'],4649 CONF.rescue_timeout):4650 to_unrescue.append(instance)4651 for instance in to_unrescue:4652 self.compute_api.unrescue(context, instance)4653 @periodic_task.periodic_task4654 def _poll_unconfirmed_resizes(self, context):4655 if CONF.resize_confirm_window == 0:4656 return4657 migrations = objects.MigrationList.get_unconfirmed_by_dest_compute(4658 context, CONF.resize_confirm_window, self.host,4659 use_slave=True)4660 migrations_info = dict(migration_count=len(migrations),4661 confirm_window=CONF.resize_confirm_window)4662 if migrations_info["migration_count"] > 0:4663 LOG.info(_("Found %(migration_count)d unconfirmed migrations "4664 "older than %(confirm_window)d seconds"),4665 migrations_info)4666 def _set_migration_to_error(migration, reason, **kwargs):4667 LOG.warn(_("Setting migration %(migration_id)s to error: "4668 "%(reason)s"),4669 {'migration_id': migration['id'], 'reason': reason},4670 **kwargs)4671 migration.status = 'error'4672 migration.save(context.elevated())4673 for migration in migrations:4674 instance_uuid = migration.instance_uuid4675 LOG.info(_("Automatically confirming migration "4676 "%(migration_id)s for instance %(instance_uuid)s"),4677 {'migration_id': migration.id,4678 'instance_uuid': instance_uuid})4679 expected_attrs = ['metadata', 'system_metadata']4680 try:4681 instance = objects.Instance.get_by_uuid(context,4682 instance_uuid, expected_attrs=expected_attrs,4683 use_slave=True)4684 except exception.InstanceNotFound:4685 reason = (_("Instance %s not found") %4686 instance_uuid)4687 _set_migration_to_error(migration, reason)4688 continue4689 if instance['vm_state'] == vm_states.ERROR:4690 reason = _("In ERROR state")4691 _set_migration_to_error(migration, reason,4692 instance=instance)4693 continue4694 # race condition: The instance in DELETING state should not be4695 # set the migration state to error, otherwise the instance in4696 # to be deleted which is in RESIZED state4697 # will not be able to confirm resize4698 if instance.task_state in [task_states.DELETING,4699 task_states.SOFT_DELETING]:4700 msg = ("Instance being deleted or soft deleted during resize "4701 "confirmation. Skipping.")4702 LOG.debug(msg, instance=instance)4703 continue4704 vm_state = instance['vm_state']4705 task_state = instance['task_state']4706 if vm_state != vm_states.RESIZED or task_state is not None:4707 reason = (_("In states %(vm_state)s/%(task_state)s, not "4708 "RESIZED/None") %4709 {'vm_state': vm_state,4710 'task_state': task_state})4711 _set_migration_to_error(migration, reason,4712 instance=instance)4713 continue4714 try:4715 self.compute_api.confirm_resize(context, instance,4716 migration=migration)4717 except Exception as e:4718 LOG.info(_("Error auto-confirming resize: %s. "4719 "Will retry later."),4720 e, instance=instance)4721 @compute_utils.periodic_task_spacing_warn("shelved_poll_interval")4722 @periodic_task.periodic_task(spacing=CONF.shelved_poll_interval)4723 def _poll_shelved_instances(self, context):4724 if CONF.shelved_offload_time <= 0:4725 return4726 filters = {'vm_state': vm_states.SHELVED,4727 'host': self.host}4728 shelved_instances = objects.InstanceList.get_by_filters(4729 context, filters=filters, expected_attrs=['system_metadata'],4730 use_slave=True)4731 to_gc = []4732 for instance in shelved_instances:4733 sys_meta = instance.system_metadata4734 shelved_at = timeutils.parse_strtime(sys_meta['shelved_at'])4735 if timeutils.is_older_than(shelved_at, CONF.shelved_offload_time):4736 to_gc.append(instance)4737 for instance in to_gc:4738 try:4739 instance.task_state = task_states.SHELVING_OFFLOADING4740 instance.save()4741 self.shelve_offload_instance(context, instance)4742 except Exception:4743 LOG.exception(_LE('Periodic task failed to offload instance.'),4744 instance=instance)4745 @periodic_task.periodic_task4746 def _instance_usage_audit(self, context):4747 if not CONF.instance_usage_audit:4748 return4749 if compute_utils.has_audit_been_run(context,4750 self.conductor_api,4751 self.host):4752 return4753 begin, end = utils.last_completed_audit_period()4754 instances = objects.InstanceList.get_active_by_window_joined(4755 context, begin, end, host=self.host,4756 expected_attrs=['system_metadata', 'info_cache', 'metadata'],4757 use_slave=True)4758 num_instances = len(instances)4759 errors = 04760 successes = 04761 LOG.info(_("Running instance usage audit for"4762 " host %(host)s from %(begin_time)s to "4763 "%(end_time)s. %(number_instances)s"4764 " instances."),4765 dict(host=self.host,4766 begin_time=begin,4767 end_time=end,4768 number_instances=num_instances))4769 start_time = time.time()4770 compute_utils.start_instance_usage_audit(context,4771 self.conductor_api,4772 begin, end,4773 self.host, num_instances)4774 for instance in instances:4775 try:4776 self.conductor_api.notify_usage_exists(4777 context, instance,4778 ignore_missing_network_data=False)4779 successes += 14780 except Exception:4781 LOG.exception(_LE('Failed to generate usage '4782 'audit for instance '4783 'on host %s'), self.host,4784 instance=instance)4785 errors += 14786 compute_utils.finish_instance_usage_audit(context,4787 self.conductor_api,4788 begin, end,4789 self.host, errors,4790 "Instance usage audit ran "4791 "for host %s, %s instances "4792 "in %s seconds." % (4793 self.host,4794 num_instances,4795 time.time() - start_time))4796 @compute_utils.periodic_task_spacing_warn("bandwidth_poll_interval")4797 @periodic_task.periodic_task(spacing=CONF.bandwidth_poll_interval)4798 def _poll_bandwidth_usage(self, context):4799 if (CONF.bandwidth_poll_interval <= 0 or not self._bw_usage_supported):4800 return4801 prev_time, start_time = utils.last_completed_audit_period()4802 curr_time = time.time()4803 if (curr_time - self._last_bw_usage_poll >4804 CONF.bandwidth_poll_interval):4805 self._last_bw_usage_poll = curr_time4806 LOG.info(_("Updating bandwidth usage cache"))4807 cells_update_interval = CONF.cells.bandwidth_update_interval4808 if (cells_update_interval > 0 and4809 curr_time - self._last_bw_usage_cell_update >4810 cells_update_interval):4811 self._last_bw_usage_cell_update = curr_time4812 update_cells = True4813 else:4814 update_cells = False4815 instances = objects.InstanceList.get_by_host(context,4816 self.host,4817 use_slave=True)4818 try:4819 bw_counters = self.driver.get_all_bw_counters(instances)4820 except NotImplementedError:4821 # NOTE(mdragon): Not all hypervisors have bandwidth polling4822 # implemented yet. If they don't it doesn't break anything,4823 # they just don't get the info in the usage events.4824 # NOTE(PhilDay): Record that its not supported so we can4825 # skip fast on future calls rather than waste effort getting4826 # the list of instances.4827 LOG.warning(_("Bandwidth usage not supported by hypervisor."))4828 self._bw_usage_supported = False4829 return4830 refreshed = timeutils.utcnow()4831 for bw_ctr in bw_counters:4832 # Allow switching of greenthreads between queries.4833 greenthread.sleep(0)4834 bw_in = 04835 bw_out = 04836 last_ctr_in = None4837 last_ctr_out = None4838 usage = objects.BandwidthUsage.get_by_instance_uuid_and_mac(4839 context, bw_ctr['uuid'], bw_ctr['mac_address'],4840 start_period=start_time, use_slave=True)4841 if usage:4842 bw_in = usage.bw_in4843 bw_out = usage.bw_out4844 last_ctr_in = usage.last_ctr_in4845 last_ctr_out = usage.last_ctr_out4846 else:4847 usage = (objects.BandwidthUsage.4848 get_by_instance_uuid_and_mac(4849 context, bw_ctr['uuid'], bw_ctr['mac_address'],4850 start_period=prev_time, use_slave=True))4851 if usage:4852 last_ctr_in = usage.last_ctr_in4853 last_ctr_out = usage.last_ctr_out4854 if last_ctr_in is not None:4855 if bw_ctr['bw_in'] < last_ctr_in:4856 # counter rollover4857 bw_in += bw_ctr['bw_in']4858 else:4859 bw_in += (bw_ctr['bw_in'] - last_ctr_in)4860 if last_ctr_out is not None:4861 if bw_ctr['bw_out'] < last_ctr_out:4862 # counter rollover4863 bw_out += bw_ctr['bw_out']4864 else:4865 bw_out += (bw_ctr['bw_out'] - last_ctr_out)4866 objects.BandwidthUsage.create(context,4867 bw_ctr['uuid'],4868 bw_ctr['mac_address'],4869 bw_in,4870 bw_out,4871 bw_ctr['bw_in'],4872 bw_ctr['bw_out'],4873 start_period=start_time,4874 last_refreshed=refreshed,4875 update_cells=update_cells)4876 def _get_host_volume_bdms(self, context, use_slave=False):4877 """Return all block device mappings on a compute host."""4878 compute_host_bdms = []4879 instances = objects.InstanceList.get_by_host(context, self.host)4880 for instance in instances:4881 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(4882 context, instance.uuid, use_slave=use_slave)4883 instance_bdms = [bdm for bdm in bdms if bdm.is_volume]4884 compute_host_bdms.append(dict(instance=instance,4885 instance_bdms=instance_bdms))4886 return compute_host_bdms4887 def _update_volume_usage_cache(self, context, vol_usages):4888 """Updates the volume usage cache table with a list of stats."""4889 for usage in vol_usages:4890 # Allow switching of greenthreads between queries.4891 greenthread.sleep(0)4892 self.conductor_api.vol_usage_update(context, usage['volume'],4893 usage['rd_req'],4894 usage['rd_bytes'],4895 usage['wr_req'],4896 usage['wr_bytes'],4897 usage['instance'])4898 @periodic_task.periodic_task(spacing=CONF.volume_usage_poll_interval)4899 def _poll_volume_usage(self, context, start_time=None):4900 if CONF.volume_usage_poll_interval == 0:4901 return4902 if not start_time:4903 start_time = utils.last_completed_audit_period()[1]4904 compute_host_bdms = self._get_host_volume_bdms(context,4905 use_slave=True)4906 if not compute_host_bdms:4907 return4908 LOG.debug("Updating volume usage cache")4909 try:4910 vol_usages = self.driver.get_all_volume_usage(context,4911 compute_host_bdms)4912 except NotImplementedError:4913 return4914 self._update_volume_usage_cache(context, vol_usages)4915 @compute_utils.periodic_task_spacing_warn("sync_power_state_interval")4916 @periodic_task.periodic_task(spacing=CONF.sync_power_state_interval,4917 run_immediately=True)4918 def _sync_power_states(self, context):4919 """Align power states between the database and the hypervisor.4920 To sync power state data we make a DB call to get the number of4921 virtual machines known by the hypervisor and if the number matches the4922 number of virtual machines known by the database, we proceed in a lazy4923 loop, one database record at a time, checking if the hypervisor has the4924 same power state as is in the database.4925 """4926 db_instances = objects.InstanceList.get_by_host(context,4927 self.host,4928 use_slave=True)4929 num_vm_instances = self.driver.get_num_instances()4930 num_db_instances = len(db_instances)4931 if num_vm_instances != num_db_instances:4932 LOG.warn(_("Found %(num_db_instances)s in the database and "4933 "%(num_vm_instances)s on the hypervisor."),4934 {'num_db_instances': num_db_instances,4935 'num_vm_instances': num_vm_instances})4936 def _sync(db_instance):4937 # NOTE(melwitt): This must be synchronized as we query state from4938 # two separate sources, the driver and the database.4939 # They are set (in stop_instance) and read, in sync.4940 @utils.synchronized(db_instance.uuid)4941 def query_driver_power_state_and_sync():4942 self._query_driver_power_state_and_sync(context, db_instance)4943 try:4944 query_driver_power_state_and_sync()4945 except Exception:4946 LOG.exception(_LE("Periodic sync_power_state task had an "4947 "error while processing an instance."),4948 instance=db_instance)4949 self._syncs_in_progress.pop(db_instance.uuid)4950 for db_instance in db_instances:4951 # process syncs asynchronously - don't want instance locking to4952 # block entire periodic task thread4953 uuid = db_instance.uuid4954 if uuid in self._syncs_in_progress:4955 LOG.debug('Sync already in progress for %s' % uuid)4956 else:4957 LOG.debug('Triggering sync for uuid %s' % uuid)4958 self._syncs_in_progress[uuid] = True4959 self._sync_power_pool.spawn_n(_sync, db_instance)4960 def _query_driver_power_state_and_sync(self, context, db_instance):4961 if db_instance.task_state is not None:4962 LOG.info(_LI("During sync_power_state the instance has a "4963 "pending task (%(task)s). Skip."),4964 {'task': db_instance.task_state}, instance=db_instance)4965 return4966 # No pending tasks. Now try to figure out the real vm_power_state.4967 try:4968 vm_instance = self.driver.get_info(db_instance)4969 vm_power_state = vm_instance['state']4970 except exception.InstanceNotFound:4971 vm_power_state = power_state.NOSTATE4972 # Note(maoy): the above get_info call might take a long time,4973 # for example, because of a broken libvirt driver.4974 try:4975 self._sync_instance_power_state(context,4976 db_instance,4977 vm_power_state,4978 use_slave=True)4979 except exception.InstanceNotFound:4980 # NOTE(hanlind): If the instance gets deleted during sync,4981 # silently ignore.4982 pass4983 def _sync_instance_power_state(self, context, db_instance, vm_power_state,4984 use_slave=False):4985 """Align instance power state between the database and hypervisor.4986 If the instance is not found on the hypervisor, but is in the database,4987 then a stop() API will be called on the instance.4988 """4989 # We re-query the DB to get the latest instance info to minimize4990 # (not eliminate) race condition.4991 db_instance.refresh(use_slave=use_slave)4992 db_power_state = db_instance.power_state4993 vm_state = db_instance.vm_state4994 if self.host != db_instance.host:4995 # on the sending end of nova-compute _sync_power_state4996 # may have yielded to the greenthread performing a live4997 # migration; this in turn has changed the resident-host4998 # for the VM; However, the instance is still active, it4999 # is just in the process of migrating to another host.5000 # This implies that the compute source must relinquish5001 # control to the compute destination.5002 LOG.info(_("During the sync_power process the "5003 "instance has moved from "5004 "host %(src)s to host %(dst)s") %5005 {'src': db_instance.host,5006 'dst': self.host},