Test your AI Agents with the all-new Agent to Agent Testing Platform.Learn More

How to use get_free_memory_mb method in lisa

Best Python code snippet using lisa_python

dpdkutil.py

Source:dpdkutil.py

...77    nics_count = len(node.nics.get_upper_nics())78    numa_nodes = node.tools[Lscpu].get_numa_node_count()79    request_pages_2mb = (nics_count - 1) * 1024 * numa_nodes80    request_pages_1gb = (nics_count - 1) * numa_nodes81    memfree_2mb = meminfo.get_free_memory_mb()82    memfree_1mb = meminfo.get_free_memory_gb()83    # request 2iGB memory per nic, 1 of 2MiB pages and 1 GiB page84    # check there is enough memory on the device first.85    # default to enough for one nic if not enough is available86    # this should be fine for tests on smaller SKUs87    if memfree_2mb < request_pages_2mb:88        node.log.debug(89            "WARNING: Not enough 2MB pages available for DPDK! "90            f"Requesting {request_pages_2mb} found {memfree_2mb} free. "91            "Test may fail if it cannot allocate memory."92        )93        request_pages_2mb = 102494    if memfree_1mb < (request_pages_1gb * 2):  # account for 2MB pages by doubling ask95        node.log.debug(...

utils.py

Source:utils.py

1import os2from typing import Tuple, List3import torch4import torch.nn as nn5from PIL import Image6import numpy as np7import multiprocessing8from diffusers import StableDiffusionPipeline9from diffusers.pipelines.stable_diffusion.safety_checker import (10    StableDiffusionSafetyChecker,11)12from transformers import CLIPFeatureExtractor13from transformers.feature_extraction_utils import BatchFeature14def image_grid(imgs, rows, cols):15    assert len(imgs) == rows * cols16    w, h = imgs[0].size17    grid = Image.new("RGB", size=(cols * w, rows * h))18    grid_w, grid_h = grid.size19    for i, img in enumerate(imgs):20        grid.paste(img, box=(i % cols * w, i // cols * h))21    return grid22def dummy_checker(images, *args, **kwargs):23    # removes nsfw filter24    return images, False25def dummy_extractor(images, return_tensors="pt"):26    # print(type(images), type(images[0]))27    if type(images) is list:28        images = [np.array(img) for img in images]29    data = {"pixel_values": images}30    return BatchFeature(data=data, tensor_type=return_tensors)31def remove_nsfw(32    model: StableDiffusionPipeline,33) -> Tuple[StableDiffusionSafetyChecker, CLIPFeatureExtractor]:34    nsfw_model: StableDiffusionSafetyChecker = model.safety_checker35    if isinstance(nsfw_model, StableDiffusionSafetyChecker):36        nsfw_model = nsfw_model.cpu()37    model.safety_checker = dummy_checker38    extr = model.feature_extractor39    model.feature_extractor = dummy_extractor40    return nsfw_model, extr41def get_gpu_setting(env_var: str) -> Tuple[bool, List[int]]:42    if not torch.cuda.is_available():43        print("GPU not detected! Make sure you have a GPU to reduce inference time!")44        return False, []45    # reads user input, returns multi_gpu flag and gpu id(s)46    n = torch.cuda.device_count()47    if env_var == "all":48        gpus = list(range(n))49    elif "," in env_var:50        gpus = [int(gnum) for gnum in env_var.split(",") if int(gnum) < n]51    else:52        gpus = [int(env_var)]53    assert len(54        gpus55    ), f"Make sure to provide valid device ids! You have {n} GPU(s), you can specify the following values: {list(range(n))}"56    return len(gpus) > 1, gpus57def get_free_memory_Mb(device: int):58    # returns (free, total) device memory, in bytes59    return torch.cuda.mem_get_info(device)[0] / 2**2060def model_size_Mb(model):61    # from the legend @ptrblck himself https://discuss.pytorch.org/t/finding-model-size/130275/262    param_size = 063    for param in model.parameters():64        param_size += param.nelement() * param.element_size()65    buffer_size = 066    for buffer in model.buffers():67        buffer_size += buffer.nelement() * buffer.element_size()68    return (param_size + buffer_size) / 1024**269class ToGPUWrapper(nn.Module, object):70    def __init__(self, layer: nn.Module, device: torch.device) -> None:71        # composition design, we wrap a nn.Module, change forward72        super().__init__()73        self.device = device74        # move wrapped model to correct device75        self.layer = layer.to(device)76    def forward(self, x: torch.Tensor, *args, **kwargs):77        # move input and output to given device78        # print(self.layer.__class__.__name__)79        args = [a.to(self.device) if type(a) is torch.Tensor else a for a in args]80        for k in kwargs:81            if type(kwargs[k]) is torch.Tensor:82                kwargs[k] = kwargs[k].to(self.device)83        y = self.layer(x.to(self.device), *args, **kwargs)84        # text model wraps output.. this could be made more generic85        if self.layer.__class__.__name__ == "CLIPTextModel":86            # getting does something like this self.to_tuple()[k]87            y.last_hidden_state = y.last_hidden_state.to(self.device)88            return y89        return y.to(self.device)90    # FIXME this is giving recursion problems91    # def __getattr__(self, name: str):92    # return getattr(self.layer, name)93    def __iter__(self):94        return iter(self.layer)95    def __next__(self):96        return next(self.layer)97    def decode(self, z):98        # for vae output99        return self.layer.decode(z.to(self.device))100class ModelParts2GPUsAssigner:101    def __init__(102        self,103        devices: List[int],104    ) -> None:105        """106        Finds a valid assignment of model parts (unet, vae..) to available GPUs107        using a stochastic brute-force approach. The problem is formulated108        as a Integer Linear Programming one:109            maximize w^t X with  w=[a, b, c, d]110            subject to x_1 a + y_1 b + z_1 c + k_1 d \leq v_1111            \dots112            x_n a + y_n b + z_n c + k_n d \leq v_n113            with \sum x_i=\sum y_i=\sum z_i=\sum k_i114            x, y, z, k \geq 0115            x, y, z, k \in Z^n116        `self.W` represents the memory requirements of each component in which the model is split117        into.118        `self.G` is a vector of size N, containing the available memory of each device. Available119        memory is conservatively taken as 60% of the free memory.120        The assignment state I is a Nx4 matrix where I[i,j] represents the number of components j121        assigned to GPU i (initially 0).  122        """123        self.N = len(devices)124        # memory "budget" for each device: we consider 60% of the available GPU memory125        # so that the rest can be used for storing intermediate results126        # TODO unet uses way more than the other components, optmize to do inference on 512x512127        G = [int(get_free_memory_Mb(d) * 0.6) for d in devices]128        print("Free GPU memory (per device): ", G)129        # FIXME G is kind of a function of n_models itself, as the more models you have130        # the more memory you will be using for storing intermediate results...131        self.G = np.array(G, dtype=np.uint16)132        # model components memory usage, fixed order: unet_e, unet_d, text_encoder, vae133        # TODO make dynamic using `model_size_Mb(model.text_encoder)`,134        fp16 = bool(int(os.environ.get("FP16", 1)))135        if fp16:136            self.W = np.array([666, 975, 235, 160])137        else:138            # fp32 weights139            self.W = np.array([1331, 1949, 470, 320])140        single_model = bool(os.environ.get("SINGLE_MODEL_PARALLEL", False))141        # easy way to ensure single model multiple gpus, useful for debugging142        if single_model:143            self._max_models = 1144        else:145            # max number of models you can have considering pooled VRam as it if was a single GPU,146            # "upper bounded" by max number of processes147            self._max_models = min(148                multiprocessing.cpu_count(), np.floor(self.G.sum() / self.W.sum())149            )150        if np.floor(self.G.sum() / self.W.sum()) == 0:151            raise Exception(152                "You don't have enough combined VRam to host a single model! Try to run the container using the FP16 mode."153            )154    def state_evaluation(self, state: np.ndarray):155        """156        2 conditions:157            - each model component must appear in the same number (implicitly generated)158            - allocation on each GPUs must not be greater than its capacity159        """160        return (state @ self.W <= self.G).all()161    def add_model(self, state: np.ndarray, rnd=True, sample_size=2)->List[np.ndarray]:162        """163        This function takes an assignment state and tries to add a "model" to it:164        adding a model means assigning *each of the 4 components* to a device.165        It does so by brute-force searching for valid assignments that support166        the addition of another model. 167        If no such assignment exist, an empty list is returned.168        can be169        changed through `sample_size`170        Args:171            state (np.ndarray): The initial state from which the search starts from.172            rnd (bool, optional): Whether to generate new assignments in a random fashion, 173            rather than proceeding "linearly". Defaults to True.174            sample_size (int, optional): The number of valid assignments needed to175            interrupt the search before the whole space is visited. Defaults to 2.176        """177        def get_device_permutation():178            if rnd:179                return np.random.permutation(self.N)180            return np.arange(self.N)181        # beware, this will modify state in-place182        valid = []183        # N^4 possible combinations184        # plus one on cells (0, a), (1, b), (2, c), (3, d)185        for a in get_device_permutation():186            state[a, 0] += 1187            for b in get_device_permutation():188                state[b, 1] += 1189                for c in get_device_permutation():190                    state[c, 2] += 1191                    for d in get_device_permutation():192                        state[d, 3] += 1193                        # evaluate state, return first valid or keep a list of valid ones? Or one with max "score"?194                        # greedy return one, can't guarantee to find (one of the) optimum(s)195                        if self.state_evaluation(state):196                            # could be compressed by only storing a,b,c,d..197                            valid.append(state.copy())198                        # here state wasn't backtracked!199                        if sample_size > 0 and len(valid) >= sample_size:200                            return valid201                        # backtrack!202                        state[d, 3] -= 1203                    state[c, 2] -= 1204                state[b, 1] -= 1205            state[a, 0] -= 1206        return valid207    def find_best_assignment(208        self, state: np.ndarray, curr_n_models: int, **kwargs209    ) -> Tuple[int, List[np.ndarray]]:210        """ 211            Starting from the intial empty assignment, tries to add a model to the multi-gpu212            setup recursively, stopping whenever this is impossible.  213        """214        if curr_n_models >= self._max_models:215            return -1, []216        prev = state.copy()217        valid = self.add_model(state, **kwargs)218        # can't generate valid assignments with an extra model, return current one219        if not len(valid):220            return curr_n_models, [prev]221        # visit children222        children = []223        for next_state in valid:224            # insert only valid states225            depth, ss = self.find_best_assignment(226                next_state, curr_n_models + 1, **kwargs227            )228            if depth > 0 and len(ss):229                children.append((depth, ss))230        # can't add more models231        if not len(children):232            return curr_n_models + 1, valid233        # return best child, the one that assigns more models234        return max(children, key=lambda t: t[0])235    def __call__(self) -> np.ndarray:236        # initial empty assignment, #GPUs x #model_parts237        I = np.zeros((self.N, 4), dtype=np.uint16)238        # returns a valid assignment of split component to devices239        n_models, ass = self.find_best_assignment(I, 0)240        ass = ass[0]241        print(242            f"Search has found that {n_models} model(s) can be split over {self.N} device(s)!"243        )244        # format output into a [{model_component->device}], one per model to create245        model_ass = [{i: -1 for i in range(4)} for _ in range(n_models)]246        for comp in range(4):247            for dev in range(self.N):248                # this might say "component_0 to device_1 3 times"249                for _ in range(ass[dev, comp]):250                    for m in model_ass:251                        # assign to model that doesn't have an allocated component yet252                        if m[comp] == -1:253                            m[comp] = dev...

free.py

Source:free.py

...56        raise LisaException(f"Failed to get info for field {field_name}")57    def get_swap_size(self) -> int:58        # Return total swap size in Mebibytes59        return self._get_field_bytes_kib("Swap", "total") >> 1060    def get_free_memory_mb(self) -> int:61        return self._get_field_bytes_kib("Mem", "free") >> 1062    def get_free_memory_gb(self) -> int:63        return self._get_field_bytes_kib("Mem", "free") >> 2064    def get_total_memory(self) -> str:65        """66        Returns total memory in power of 1000 with unit67        Example: 20G68        """69        # Example70        #             total        used        free      shared  buff/cache   available71        # Mem:         9.0G        4.6G        751M         74M        3.7G        4.0G72        # Swap:         0B          0B          0B73        output = self.run("-h --si", shell=True).stdout74        group = find_group_in_lines(output, self._mem_pattern)...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.