nova創(chuàng)建虛擬機(jī)時(shí)資源的檢測(cè)

2019-11-08 00:28:56

字體：大中小

供稿：網(wǎng)友

在通過/nova/compute/manager.py:ComputeManager中的_build_and_run_instance來創(chuàng)建虛擬機(jī)的時(shí)候，會(huì)通過claim機(jī)制來監(jiān)視當(dāng)前的資源是否夠創(chuàng)建虛擬機(jī) def _build_and_run_instance(self, context, instance, image, injected_files, admin_passWord, requested_networks, security_groups, block_device_mapping, node, limits, filter_PRoperties): image_name = image.get('name') self._notify_about_instance_usage(context, instance, 'create.start', extra_usage_info={'image_name': image_name}) self._check_device_tagging(requested_networks, block_device_mapping) try: rt = self._get_resource_tracker(node) with rt.instance_claim(context, instance, limits):可以看到這里下通過_get_resource_tracker 得到rt對(duì)象 def _get_resource_tracker(self, nodename): rt = self._resource_tracker_dict.get(nodename) if not rt: if not self.driver.node_is_available(nodename): raise exception.NovaException( _("%s is not a valid node managed by this " "compute host.") % nodename) rt = resource_tracker.ResourceTracker(self.host, self.driver, nodename) self._resource_tracker_dict[nodename] = rt return rt_get_resource_tracker 首先檢查_resource_tracker_dict是否已經(jīng)包含這個(gè)rt了，沒有的話，就通過resource_tracker.ResourceTracker 來創(chuàng)建這個(gè)class其中ResourceTracker是在compute/resource_tracker.py 只是簡(jiǎn)單的賦值.最后將新創(chuàng)建的rt加到_resource_tracker_dict中繼續(xù)看instance_claim def instance_claim(self, context, instance, limits=None): """Indicate that some resources are needed for an upcoming compute instance build Operation. This should be called before the compute node is about to perform an instance build operation that will consume additional resources. :param context: security context :param instance: instance to reserve resources for. :type instance: nova.objects.instance.Instance object :param limits: Dict of oversubscription limits for memory, disk, and CPUs. :returns: A Claim ticket representing the reserved resources. It can be used to revert the resource usage if an error occurs during the instance build. """ if self.disabled: # compute_driver doesn't support resource tracking, just # set the 'host' and node fields and continue the build: self._set_instance_host_and_node(instance) return claims.NopClaim() # sanity checks: if instance.host: LOG.warning(_LW("Host field should not be set on the instance " "until resources have been claimed."), instance=instance) if instance.node: LOG.warning(_LW("Node field should not be set on the instance " "until resources have been claimed."), instance=instance) # get the overhead required to build this instance: overhead = self.driver.estimate_instance_overhead(instance) LOG.debug("Memory overhead for %(flavor)d MB instance; %(overhead)d " "MB", {'flavor': instance.flavor.memory_mb, 'overhead': overhead['memory_mb']}) LOG.debug("Disk overhead for %(flavor)d GB instance; %(overhead)d " "GB", {'flavor': instance.flavor.root_gb, 'overhead': overhead.get('disk_gb', 0)}) pci_requests = objects.InstancePCIRequests.get_by_instance_uuid( context, instance.uuid) claim = claims.Claim(context, instance, self, self.compute_node, pci_requests, overhead=overhead, limits=limits) # self._set_instance_host_and_node() will save instance to the DB # so set instance.numa_topology first. We need to make sure # that numa_topology is saved while under COMPUTE_RESOURCE_SEMAPHORE # so that the resource audit knows about any cpus we've pinned. instance_numa_topology = claim.claimed_numa_topology instance.numa_topology = instance_numa_topology self._set_instance_host_and_node(instance) if self.pci_tracker: # NOTE(jaypipes): ComputeNode.pci_device_pools is set below # in _update_usage_from_instance(). self.pci_tracker.claim_instance(context, pci_requests, instance_numa_topology) # Mark resources in-use and update stats self._update_usage_from_instance(context, instance) elevated = context.elevated() # persist changes to the compute node: self._update(elevated) return claim在instance_claim 中重點(diǎn)是申明了claim = claims.Claim(context, instance, self, self.compute_node, pci_requests, overhead=overhead, limits=limits)我們繼續(xù)看看class Claim(NopClaim): """A declaration that a compute host operation will require free resources. Claims serve as marker objects that resources are being held until the update_available_resource audit process runs to do a full reconciliation of resource usage. This information will be used to help keep the local compute hosts's ComputeNode model in sync to aid the scheduler in making efficient / more correct decisions with respect to host selection. """ def __init__(self, context, instance, tracker, resources, pci_requests, overhead=None, limits=None): super(Claim, self).__init__() # Stash a copy of the instance at the current point of time self.instance = instance.obj_clone() self._numa_topology_loaded = False self.tracker = tracker self._pci_requests = pci_requests if not overhead: overhead = {'memory_mb': 0, 'disk_gb': 0} self.overhead = overhead self.context = context # Check claim at constructor to avoid mess code # Raise exception ComputeResourcesUnavailable if claim failed self._claim_test(resources, limits)初始化一些變量，最重要的就是調(diào)用_claim_test def _claim_test(self, resources, limits=None): """Test if this claim can be satisfied given available resources and optional oversubscription limits This should be called before the compute node actually consumes the resources required to execute the claim. :param resources: available local compute node resources :returns: Return true if resources are available to claim. """ if not limits: limits = {} # If an individual limit is None, the resource will be considered # unlimited: memory_mb_limit = limits.get('memory_mb') disk_gb_limit = limits.get('disk_gb') vcpus_limit = limits.get('vcpu') numa_topology_limit = limits.get('numa_topology') LOG.info(_LI("Attempting claim: memory %(memory_mb)d MB, " "disk %(disk_gb)d GB, vcpus %(vcpus)d CPU"), {'memory_mb': self.memory_mb, 'disk_gb': self.disk_gb, 'vcpus': self.vcpus}, instance=self.instance) reasons = [self._test_memory(resources, memory_mb_limit), self._test_disk(resources, disk_gb_limit), self._test_vcpus(resources, vcpus_limit), self._test_numa_topology(resources, numa_topology_limit), self._test_pci()] reasons = [r for r in reasons if r is not None] if len(reasons) > 0: raise exception.ComputeResourcesUnavailable(reason= "; ".join(reasons)) LOG.info(_LI('Claim successful'), instance=self.instance)在_claim_test 中我們可以看到創(chuàng)建一個(gè)虛擬機(jī)主要check 四個(gè)方面的資源，分別是memory/disk/vcpu/numa。針對(duì)這四中資源分別調(diào)用self._test_memory/self._test_disk/self._test_vcpus/self._test_numa_topology/self._test_pci() 來check def _test_memory(self, resources, limit): type_ = _("memory") unit = "MB" total = resources.memory_mb used = resources.memory_mb_used requested = self.memory_mb return self._test(type_, unit, total, used, requested, limit) def _test(self, type_, unit, total, used, requested, limit): """Test if the given type of resource needed for a claim can be safely allocated. """ LOG.info(_LI('Total %(type)s: %(total)d %(unit)s, used: %(used).02f ' '%(unit)s'), {'type': type_, 'total': total, 'unit': unit, 'used': used}, instance=self.instance) if limit is None: # treat resource as unlimited: LOG.info(_LI('%(type)s limit not specified, defaulting to ' 'unlimited'), {'type': type_}, instance=self.instance) return free = limit - used # Oversubscribed resource policy info: LOG.info(_LI('%(type)s limit: %(limit).02f %(unit)s, ' 'free: %(free).02f %(unit)s'), {'type': type_, 'limit': limit, 'free': free, 'unit': unit}, instance=self.instance) if requested > free: return (_('Free %(type)s %(free).02f ' '%(unit)s < requested %(requested)d %(unit)s') % {'type': type_, 'free': free, 'unit': unit, 'requested': requested})原來memory的check 就是用total -used 是否大于request的。可見僅僅是比較大小而已 def _test_disk(self, resources, limit): type_ = _("disk") unit = "GB" total = resources.local_gb used = resources.local_gb_used requested = self.disk_gb return self._test(type_, unit, total, used, requested, limit) def _test_vcpus(self, resources, limit): type_ = _("vcpu") unit = "VCPU" total = resources.vcpus used = resources.vcpus_used requested = self.vcpus return self._test(type_, unit, total, used, requested, limit)disk和cpu也是一樣的算法。即看剩下的是否滿足要求的。 def _test_pci(self): pci_requests = self._pci_requests if pci_requests.requests: stats = self.tracker.pci_tracker.stats if not stats.support_requests(pci_requests.requests): return _('Claim pci failed.')而pci的檢測(cè)主要是看當(dāng)前的tracker.pci_tracker.stats.support_requests 是否包含pci_requests.requests，并沒有數(shù)量的比較

上一篇：DownloadManager的使用

下一篇：setDestinationInExternalPublicDir方法