Best Python code snippet using lisa_python
kdumpcrash.py
Source:kdumpcrash.py  
...96            )97        ),98    )99    def kdumpcrash_validate_smp(self, node: Node, log_path: Path, log: Logger) -> None:100        self._trigger_kdump_on_specified_cpu(1, node, log_path, log)101    @TestCaseMetadata(102        description="""103        This test case verifies if the kdump is effect when VM has any cores, and104        trigger kdump on the random cpu.105        The test steps are same as `kdumpcrash_validate_single_core`.106        """,107        priority=2,108    )109    def kdumpcrash_validate_on_random_cpu(110        self, node: Node, log_path: Path, log: Logger111    ) -> None:112        lscpu = node.tools[Lscpu]113        cpu_count = lscpu.get_core_count()114        cpu_num = randint(0, cpu_count - 1)115        self._trigger_kdump_on_specified_cpu(cpu_num, node, log_path, log)116    @TestCaseMetadata(117        description="""118        This test case verifies if the kdump is effect when VM has 33~192 cpus and119        trigger kdump on the 33th cpu(cpu32), which is designed by a known issue.120        The test steps are same as `kdumpcrash_validate_single_core`.121        """,122        priority=2,123        requirement=node_requirement(124            node=schema.NodeSpace(core_count=search_space.IntRange(min=33, max=192))125        ),126    )127    def kdumpcrash_validate_on_cpu32(128        self, node: Node, log_path: Path, log: Logger129    ) -> None:130        self._trigger_kdump_on_specified_cpu(32, node, log_path, log)131    @TestCaseMetadata(132        description="""133        This test case verifies if the kdump is effect when VM has 193~415 cpus, and134        trigger kdump on the 193th cpu(cpu192), which is designed by a known issue.135        The test steps are same as `kdumpcrash_validate_single_core`.136        """,137        priority=2,138        requirement=node_requirement(139            node=schema.NodeSpace(core_count=search_space.IntRange(min=193, max=415))140        ),141    )142    def kdumpcrash_validate_on_cpu192(143        self, node: Node, log_path: Path, log: Logger144    ) -> None:145        self._trigger_kdump_on_specified_cpu(192, node, log_path, log)146    @TestCaseMetadata(147        description="""148        This test case verifies if the kdump is effect when VM has more than 415 cpus,149        and trigger kdump on the 416th cpu(cpu415), which is designed by a known issue.150        The test steps are same as `kdumpcrash_validate_single_core`.151        """,152        priority=2,153        requirement=node_requirement(154            node=schema.NodeSpace(core_count=search_space.IntRange(min=416))155        ),156    )157    def kdumpcrash_validate_on_cpu415(158        self, node: Node, log_path: Path, log: Logger159    ) -> None:160        self._trigger_kdump_on_specified_cpu(415, node, log_path, log)161    @TestCaseMetadata(162        description="""163        This test case verifies if the kdump is effect when crashkernel is set auto.164        The test steps are same as `kdumpcrash_validate_single_core`.165        """,166        priority=2,167    )168    def kdumpcrash_validate_auto_size(169        self, node: Node, log_path: Path, log: Logger170    ) -> None:171        self.crash_kernel = "auto"172        self._kdump_test(node, log_path, log)173    @TestCaseMetadata(174        description="""175        This test case verifies if the kdump is effect when crashkernel is set auto and176        the memory is more than 2T. With the crashkernel=auto parameter, system will177        reserved a suitable size memory for crash kernel. We want to see if the178        crashkernel=auto can also handle this scenario when the system memory is large.179        The test steps are same as `kdumpcrash_validate_single_core`.180        """,181        priority=2,182        requirement=node_requirement(183            node=schema.NodeSpace(memory_mb=search_space.IntRange(min=2097152)),184        ),185    )186    def kdumpcrash_validate_large_memory_auto_size(187        self, node: Node, log_path: Path, log: Logger188    ) -> None:189        self.crash_kernel = "auto"190        self._kdump_test(node, log_path, log)191    def _check_supported(self, node: Node) -> None:192        # Check the kernel config for kdump supported193        kdump = node.tools[KdumpBase]194        kdump.check_required_kernel_config()195        # Check the VMBus version for kdump supported196        dmesg = node.tools[Dmesg]197        vmbus_version = dmesg.get_vmbus_version()198        if vmbus_version < "3.0.0":199            raise SkippedException(200                f"No negotiated VMBus version {vmbus_version}. "201                "Kernel might be old or patches not included. "202                "Full support for kdump is not present."203            )204        # Below code aims to check the kernel config for "auto crashkernel" supported.205        # Redhat/Centos has this "auto crashkernel" feature. For version 7, it needs the206        # CONFIG_KEXEC_AUTO_RESERVE. For version 8, the ifdefine of that config is207        # removed. For these changes we can refer to Centos kernel, gotten according208        # to https://wiki.centos.org/action/show/Sources?action=show&redirect=sources209        # In addition, we didn't see upstream kernel has the auto crashkernel feature.210        # It may be a patch owned by Redhat/Centos.211        # Note that crashkernel=auto option in the boot command line is no longer212        # supported on RHEL 9 and later releases213        if not (214            isinstance(node.os, Redhat)215            and node.os.information.version >= "8.0.0-0"216            and node.os.information.version < "9.0.0-0"217        ):218            if self.crash_kernel == "auto" and not node.tools[KernelConfig].is_built_in(219                "CONFIG_KEXEC_AUTO_RESERVE"220            ):221                raise SkippedException("crashkernel=auto doesn't work for the distro.")222    def _get_resource_disk_dump_path(self, node: Node) -> str:223        if node.shell.exists(224            PurePosixPath("/var/log/cloud-init.log")225        ) and node.shell.exists(PurePosixPath("/var/lib/cloud/instance")):226            mount_point = "/mnt"227        else:228            mount_point = node.tools[Waagent].get_resource_disk_mount_point()229        dump_path = mount_point + "/crash"230        node.execute(231            f"mkdir -p {dump_path}",232            expected_exit_code=0,233            expected_exit_code_failure_message=(f"Fail to create dir {dump_path}"),234            shell=True,235            sudo=True,236        )237        return dump_path238    def _kdump_test(self, node: Node, log_path: Path, log: Logger) -> None:239        try:240            self._check_supported(node)241        except UnsupportedDistroException as identifier:242            raise SkippedException(identifier)243        kdump = node.tools[KdumpBase]244        free = node.tools[Free]245        total_memory = free.get_total_memory()246        if "T" in total_memory and float(total_memory.strip("T")) > 1:247            # System memory is more than 1T, need to change the dump path248            # and set crashkernel=2G249            kdump.config_resource_disk_dump_path(250                self._get_resource_disk_dump_path(node)251            )252            self.crash_kernel = "2G"253            self.timeout_of_dump_crash = 1200254            if float(total_memory.strip("T")) > 6:255                self.timeout_of_dump_crash = 2000256        kdump.config_crashkernel_memory(self.crash_kernel)257        kdump.enable_kdump_service()258        # Cleaning up any previous crash dump files259        node.execute(260            f"mkdir -p {kdump.dump_path} && rm -rf {kdump.dump_path}/*",261            shell=True,262            sudo=True,263        )264        # Reboot system to make kdump take effect265        node.reboot()266        # Confirm that the kernel dump mechanism is enabled267        kdump.check_crashkernel_loaded(self.crash_kernel)268        # Activate the magic SysRq option269        echo = node.tools[Echo]270        echo.write_to_file("1", node.get_pure_path("/proc/sys/kernel/sysrq"), sudo=True)271        node.execute("sync", shell=True, sudo=True)272        try:273            # Trigger kdump. After execute the trigger cmd, the VM will be disconnected274            # We set a timeout time 10.275            node.execute(276                self.trigger_kdump_cmd,277                shell=True,278                sudo=True,279                timeout=10,280            )281        except Exception as identifier:282            log.debug(f"ignorable ssh exception: {identifier}")283        # Check if the vmcore file is generated after triggering a crash284        self._check_kdump_result(node, log_path, log, kdump)285        # We should clean up the vmcore file since the test is passed286        node.execute(f"rm -rf {kdump.dump_path}/*", shell=True, sudo=True)287    def _check_kdump_result(288        self, node: Node, log_path: Path, log: Logger, kdump: KdumpBase289    ) -> None:290        # We use this function to check if the dump file is generated.291        # Steps:292        # 1. Try to connect the VM;293        # 2. If connected:294        #    1). Check if the dump file is generated. If so, then jump the loop.295        #       The test is passed.296        #    2). If there is no dump file, check the incomplete file (When dumping297        #        hasn't completed, the dump file is named as "*incomplete").298        #           a. If there is no incomplete file either, then raise and exception.299        #           b. If there is an incomplete file, then check if the file size300        #              is growing. If so, check it in a loop until the dump completes301        #              or incomplete file doesn't grow or timeout.302        # 3. The VM can be connected may just when the crash kernel boots up. When303        #    dumping or rebooting after dump completes, the VM might be disconnected.304        #    We need to catch the exception, and retry to connect the VM. Then follow305        #    the same steps to check.306        timer = create_timer()307        remote_node = cast(RemoteNode, node)308        system_disconnected = True309        serial_console = node.features[SerialConsole]310        while system_disconnected and timer.elapsed(False) < self.timeout_of_dump_crash:311            try:312                try_connect(remote_node._connection_info)313            except Exception as identifier:314                log.debug(315                    "Fail to connect SSH "316                    f"{remote_node._connection_info.address}:"317                    f"{remote_node._connection_info.port}. "318                    f"{identifier.__class__.__name__}: {identifier}. Retry..."319                )320                serial_console.check_initramfs(321                    saved_path=log_path, stage="after_trigger_crash", force_run=True322                )323                system_disconnected = True324                continue325            # If there is no exception, then the system is connected326            system_disconnected = False327            # After trigger kdump, the VM will reboot. We need to close the node328            node.close()329            saved_dumpfile_size = 0330            max_retries = 10331            retries = 0332            # Check in this loop until the dump file is generated or incomplete file333            # doesn't grow or timeout334            while True:335                try:336                    # The exit code of this command is always 0. Check the stdout337                    result = node.execute(338                        f"find {kdump.dump_path} -type f -size +10M "339                        "\\( -name vmcore -o -name dump.* -o -name vmcore.* \\) "340                        "-exec ls -lh {} \\;",341                        shell=True,342                        sudo=True,343                    )344                    if result.stdout:345                        break346                    # Check if has dump incomplete file347                    result = node.execute(348                        f"find {kdump.dump_path} -name '*incomplete*'",349                        shell=True,350                        sudo=True,351                    )352                    if result.stdout:353                        incomplete_file = result.stdout354                        stat = node.tools[Stat]355                        incomplete_file_size = stat.get_total_size(incomplete_file)356                except Exception as identifier:357                    log.debug(358                        "Fail to execute command. It may be caused by the system kernel"359                        " reboot after dumping vmcore."360                        f"{identifier.__class__.__name__}: {identifier}. Retry..."361                    )362                    system_disconnected = True363                    break364                if result.stdout:365                    if incomplete_file_size > saved_dumpfile_size:366                        saved_dumpfile_size = incomplete_file_size367                        retries = 0368                    else:369                        retries = retries + 1370                        if retries >= max_retries:371                            raise LisaException(372                                "The vmcore file is incomplete with file size"373                                f" {round(incomplete_file_size/1024/1024, 2)}MB"374                            )375                else:376                    retries = retries + 1377                    if retries >= max_retries:378                        raise LisaException(379                            "No vmcore or vmcore-incomplete is found under "380                            f"{kdump.dump_path} with file size greater than 10M."381                        )382                if timer.elapsed(False) > self.timeout_of_dump_crash:383                    raise LisaException(384                        "Timeout to dump vmcore file. The size of vmcore-incomplete is"385                        f" {round(incomplete_file_size/1024/1024, 2)}MB"386                    )387                time.sleep(5)388        if system_disconnected:389            raise LisaException("Timeout to connect the VM after triggering kdump.")390    def _trigger_kdump_on_specified_cpu(391        self, cpu_num: int, node: Node, log_path: Path, log: Logger392    ) -> None:393        lscpu = node.tools[Lscpu]394        cpu_count = lscpu.get_core_count()395        if cpu_count > cpu_num:396            self.trigger_kdump_cmd = (397                f"taskset -c {cpu_num} echo c > /proc/sysrq-trigger"398            )399            self._kdump_test(node, log_path, log)400        else:401            raise SkippedException(402                "The cpu count can't meet the test case's requirement. "403                f"Expected more than {cpu_num} cpus, actual {cpu_count}"404            )Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
