Best Python code snippet using lisa_python
infinibandsuit.py
Source:infinibandsuit.py  
1# Copyright (c) Microsoft Corporation.2# Licensed under the MIT license.3from assertpy import assert_that4from lisa import (5    Environment,6    Logger,7    Node,8    TestCaseMetadata,9    TestSuite,10    TestSuiteMetadata,11    simple_requirement,12)13from lisa.features import Infiniband, Sriov14from lisa.sut_orchestrator.azure.tools import Waagent15from lisa.tools import Find, KernelConfig, Modprobe, Ssh16from lisa.util import (17    SkippedException,18    UnsupportedDistroException,19    UnsupportedKernelException,20)21from lisa.util.parallel import run_in_parallel22@TestSuiteMetadata(23    area="hpc",24    category="functional",25    description="""26    Tests the functionality of infiniband.27    """,28)29class InfinibandSuit(TestSuite):30    @TestCaseMetadata(31        description="""32        This test case will33        1. Determine whether the VM has Infiniband over SR-IOV34        2. Ensure waagent is configures with OS.EnableRDMA=y35        3. Check that appropriate drivers are present36        """,37        priority=2,38        requirement=simple_requirement(39            network_interface=Sriov(), supported_features=[Infiniband]40        ),41    )42    def verify_hpc_over_sriov(self, log: Logger, node: Node) -> None:43        try:44            infiniband = node.features[Infiniband]45        except (UnsupportedDistroException, UnsupportedKernelException) as err:46            raise SkippedException(err)47        assert_that(infiniband.is_over_sriov()).described_as(48            "Based on VM SKU information we expected Infiniband over SR-IOV,"49            " but no matching devices were found."50        ).is_true()51        waagent = node.tools[Waagent]52        assert_that(waagent.is_rdma_enabled()).described_as(53            "Found waagent configuration of OS.EnableRDMA=y "54            "was missing or commented out"55        ).is_true()56        log.debug("Verified waagent config OS.EnableRDMA=y set successfully")57        modprobe = node.tools[Modprobe]58        expected_modules = [59            "mlx5_ib",60            "ib_uverbs",61            "ib_core",62            "mlx5_core",63            "mlx_compat",64            "rdma_cm",65            "iw_cm",66            "ib_cm",67        ]68        for module in expected_modules:69            assert_that(modprobe.is_module_loaded(module)).described_as(70                f"Module {module} is not loaded."71            ).is_true()72    @TestCaseMetadata(73        description="""74        This test case will75        1. Determine whether the VM has Infiniband over Network Direct76        2. Ensure waagent is configures with OS.EnableRDMA=y77        3. Check that appropriate drivers are present78        """,79        priority=2,80        requirement=simple_requirement(supported_features=[Infiniband]),81    )82    def verify_hpc_over_nd(self, log: Logger, node: Node) -> None:83        try:84            self._check_nd_enabled(node)85        except UnsupportedDistroException as err:86            raise SkippedException(err)87        try:88            infiniband = node.features[Infiniband]89        except (UnsupportedDistroException, UnsupportedKernelException) as err:90            raise SkippedException(err)91        if not infiniband.is_over_nd():92            raise SkippedException("Inifiniband over ND was not detected.")93        waagent = node.tools[Waagent]94        assert_that(waagent.is_rdma_enabled()).described_as(95            "Found waagent configuration of OS.EnableRDMA=y "96            "was missing or commented out"97        ).is_true()98        log.debug("Verified waagent config OS.EnableRDMA=y set successfully")99        modprobe = node.tools[Modprobe]100        expected_modules = ["mlx5_ib", "hv_networkdirect"]101        for module in expected_modules:102            assert_that(modprobe.is_module_loaded(module)).described_as(103                f"Module {module} is not loaded."104            ).is_true()105    @TestCaseMetadata(106        description="""107        This test case will108        1. Identify the infiniband devices and their cooresponding network interfaces109        2. Run several ping-pong tests to check RDMA / Infiniband functionality110        """,111        priority=1,112        requirement=simple_requirement(113            supported_features=[Infiniband],114            min_count=2,115        ),116    )117    def verify_ping_pong(self, environment: Environment, log: Logger) -> None:118        # Constants119        ping_pong_tests = ["ibv_rc_pingpong", "ibv_uc_pingpong", "ibv_ud_pingpong"]120        server_node = environment.nodes[0]121        client_node = environment.nodes[1]122        # Ensure RDMA is setup123        try:124            run_in_parallel(125                [126                    lambda: client_node.features[Infiniband],127                    lambda: server_node.features[Infiniband],128                ]129            )130        except (UnsupportedDistroException, UnsupportedKernelException) as err:131            raise SkippedException(err)132        server_infiniband = server_node.features[Infiniband]133        server_ib_interfaces = server_infiniband.get_ib_interfaces()134        client_infiniband = client_node.features[Infiniband]135        client_ib_interfaces = client_infiniband.get_ib_interfaces()136        client_ib_device_name = client_ib_interfaces[0].ib_device_name137        for interface in server_ib_interfaces:138            ib_device_name = interface.ib_device_name139            ip_addr = interface.ip_addr140            for test in ping_pong_tests:141                server_process = server_node.execute_async(142                    f"{test} -g 0 -d {ib_device_name}"143                )144                client_process = client_node.execute_async(145                    f"{test} -g 0 -d {client_ib_device_name} {ip_addr}"146                )147                client_result = client_process.wait_result()148                client_result.assert_exit_code(149                    0,150                    f"Client ping-pong test {test} failed with exit code "151                    f"{client_result.exit_code} and output {client_result.stdout}",152                )153                server_result = server_process.wait_result()154                server_result.assert_exit_code(155                    0,156                    f"Server ping-pong test {test} failed with exit code "157                    f"{server_result.exit_code} and output {server_result.stdout}",158                )159    @TestCaseMetadata(160        description="""161            This test case will162            1. Ensure RDMA is setup163            2. Install Intel MPI164            3. Set up ssh keys of server/client connection165            4. Run MPI pingpong tests166            5. Run other MPI tests167            """,168        priority=4,169        requirement=simple_requirement(170            supported_features=[Infiniband],171            min_count=2,172        ),173    )174    def verify_intel_mpi(self, environment: Environment, log: Logger) -> None:175        server_node = environment.nodes[0]176        client_node = environment.nodes[1]177        # Ensure RDMA is setup178        try:179            run_in_parallel(180                [181                    lambda: client_node.features[Infiniband],182                    lambda: server_node.features[Infiniband],183                ]184            )185        except (UnsupportedDistroException, UnsupportedKernelException) as err:186            raise SkippedException(err)187        server_ib = server_node.features[Infiniband]188        client_ib = client_node.features[Infiniband]189        run_in_parallel([server_ib.install_intel_mpi, client_ib.install_intel_mpi])190        # Restart the ssh sessions for changes to /etc/security/limits.conf191        # to take effect192        server_node.close()193        client_node.close()194        # Get the ip adresses and device name of ib device195        server_ib_interfaces = server_ib.get_ib_interfaces()196        client_ib_interfaces = client_ib.get_ib_interfaces()197        server_nic_name = server_ib_interfaces[0].nic_name198        server_ip = server_ib_interfaces[0].ip_addr199        client_ip = client_ib_interfaces[0].ip_addr200        # Test relies on machines being able to ssh into each other201        server_ssh = server_node.tools[Ssh]202        client_ssh = client_node.tools[Ssh]203        server_ssh.enable_public_key(client_ssh.generate_key_pairs())204        client_ssh.enable_public_key(server_ssh.generate_key_pairs())205        server_ssh.add_known_host(client_ip)206        client_ssh.add_known_host(server_ip)207        # Note: Using bash because script is not supported by Dash208        # sh points to dash on Ubuntu209        server_node.execute(210            "bash /opt/intel/oneapi/mpi/2021.1.1/bin/mpirun "211            f"-hosts {server_ip},{server_ip} -iface {server_nic_name} -ppn 1 -n 2 "212            "-env I_MPI_FABRICS=shm:ofi -env SECS_PER_SAMPLE=600 "213            "-env FI_PROVIDER=mlx -env I_MPI_DEBUG=5 -env I_MPI_PIN_DOMAIN=numa "214            "/opt/intel/oneapi/mpi/2021.1.1/bin/IMB-MPI1 pingpong",215            expected_exit_code=0,216            expected_exit_code_failure_message="Failed intra-node pingpong test "217            "with intel mpi",218        )219        server_node.execute(220            "bash /opt/intel/oneapi/mpi/2021.1.1/bin/mpirun "221            f"-hosts {server_ip},{client_ip} -iface {server_nic_name} -ppn 1 -n 2 "222            "-env I_MPI_FABRICS=shm:ofi -env SECS_PER_SAMPLE=600 "223            "-env FI_PROVIDER=mlx -env I_MPI_DEBUG=5 -env I_MPI_PIN_DOMAIN=numa "224            "/opt/intel/oneapi/mpi/2021.1.1/bin/IMB-MPI1 pingpong",225            expected_exit_code=0,226            expected_exit_code_failure_message="Failed inter-node pingpong test "227            "with intel mpi",228        )229        tests = ["IMB-MPI1 allreduce", "IMB-RMA", "IMB-NBC"]230        for test in tests:231            server_node.execute(232                "bash /opt/intel/oneapi/mpi/2021.1.1/bin/mpirun "233                f"-hosts {server_ip},{client_ip} -iface {server_nic_name} -ppn 22 "234                "-n 44 -env I_MPI_FABRICS=shm:ofi -env SECS_PER_SAMPLE=600 "235                "-env FI_PROVIDER=mlx -env I_MPI_DEBUG=5 -env I_MPI_PIN_DOMAIN=numa "236                f"/opt/intel/oneapi/mpi/2021.1.1/bin/{test}",237                expected_exit_code=0,238                expected_exit_code_failure_message=f"Failed {test} test with intel mpi",239                timeout=1200,240            )241    @TestCaseMetadata(242        description="""243            This test case will244            1. Ensure RDMA is setup245            2. Install Open MPI246            3. Set up ssh keys of server/client connection247            4. Run MPI pingpong tests248            5. Run other MPI tests249            """,250        priority=4,251        requirement=simple_requirement(252            supported_features=[Infiniband],253            min_count=2,254        ),255    )256    def verify_open_mpi(self, environment: Environment, log: Logger) -> None:257        server_node = environment.nodes[0]258        client_node = environment.nodes[1]259        # Ensure RDMA is setup260        try:261            run_in_parallel(262                [263                    lambda: client_node.features[Infiniband],264                    lambda: server_node.features[Infiniband],265                ]266            )267        except (UnsupportedDistroException, UnsupportedKernelException) as err:268            raise SkippedException(err)269        server_ib = server_node.features[Infiniband]270        client_ib = client_node.features[Infiniband]271        run_in_parallel([server_ib.install_open_mpi, client_ib.install_open_mpi])272        server_node.execute("ldconfig", sudo=True)273        client_node.execute("ldconfig", sudo=True)274        # Restart the ssh sessions for changes to /etc/security/limits.conf275        # to take effect276        server_node.close()277        client_node.close()278        # Get the ip adresses and device name of ib device279        server_ib_interfaces = server_ib.get_ib_interfaces()280        client_ib_interfaces = client_ib.get_ib_interfaces()281        server_ip = server_ib_interfaces[0].ip_addr282        client_ip = client_ib_interfaces[0].ip_addr283        # Test relies on machines being able to ssh into each other284        server_ssh = server_node.tools[Ssh]285        client_ssh = client_node.tools[Ssh]286        server_ssh.enable_public_key(client_ssh.generate_key_pairs())287        client_ssh.enable_public_key(server_ssh.generate_key_pairs())288        server_ssh.add_known_host(client_ip)289        client_ssh.add_known_host(server_ip)290        # Ping Pong test291        find = server_node.tools[Find]292        find_results = find.find_files(293            server_node.get_pure_path("/usr"), "IMB-MPI1", sudo=True294        )295        assert_that(len(find_results)).described_as(296            "Could not find location of IMB-MPI1 for Open MPI"297        ).is_greater_than(0)298        test_path = find_results[0]299        assert_that(test_path).described_as(300            "Could not find location of IMB-MPI1 for Open MPI"301        ).is_not_empty()302        server_node.execute(303            f"/usr/local/bin/mpirun --host {server_ip},{server_ip} "304            "-n 2 --mca btl self,vader,openib --mca btl_openib_cq_size 4096 "305            "--mca btl_openib_allow_ib 1 --mca "306            f"btl_openib_warn_no_device_params_found 0 {test_path} pingpong",307            expected_exit_code=0,308            expected_exit_code_failure_message="Failed intra-node ping pong test "309            "with Open MPI",310        )311        # IMB-MPI Tests312        find_results = find.find_files(313            server_node.get_pure_path("/usr"), "IMB-MPI1", sudo=True314        )315        assert_that(len(find_results)).described_as(316            "Could not find location of Open MPI test: IMB-MPI1"317        ).is_greater_than(0)318        test_path = find_results[0]319        assert_that(test_path).described_as(320            "Could not find location of Open MPI test: IMB-MPI1"321        ).is_not_empty()322        server_node.execute(323            f"/usr/local/bin/mpirun --host {server_ip},{client_ip} "324            "-n 2 --mca btl self,vader,openib --mca btl_openib_cq_size 4096 "325            "--mca btl_openib_allow_ib 1 --mca "326            f"btl_openib_warn_no_device_params_found 0 {test_path}",327            expected_exit_code=0,328            expected_exit_code_failure_message="Failed " "IMB-MPI1 test with Open MPI",329        )330    @TestCaseMetadata(331        description="""332            This test case will333            1. Ensure RDMA is setup334            2. Install IBM MPI335            3. Set up ssh keys of server/client connection336            4. Run MPI pingpong tests337            """,338        priority=4,339        requirement=simple_requirement(340            supported_features=[Infiniband],341            min_count=2,342        ),343    )344    def verify_ibm_mpi(self, environment: Environment, log: Logger) -> None:345        server_node = environment.nodes[0]346        client_node = environment.nodes[1]347        # Ensure RDMA is setup348        try:349            run_in_parallel(350                [351                    lambda: client_node.features[Infiniband],352                    lambda: server_node.features[Infiniband],353                ]354            )355        except (UnsupportedDistroException, UnsupportedKernelException) as err:356            raise SkippedException(err)357        server_ib = server_node.features[Infiniband]358        client_ib = client_node.features[Infiniband]359        run_in_parallel([server_ib.install_ibm_mpi, client_ib.install_ibm_mpi])360        # Restart the ssh sessions for changes to /etc/security/limits.conf361        # to take effect362        server_node.close()363        client_node.close()364        # Get the ip adresses and device name of ib device365        server_ib_interfaces = server_ib.get_ib_interfaces()366        client_ib_interfaces = client_ib.get_ib_interfaces()367        server_ip = server_ib_interfaces[0].ip_addr368        client_ip = client_ib_interfaces[0].ip_addr369        # Test relies on machines being able to ssh into each other370        server_ssh = server_node.tools[Ssh]371        client_ssh = client_node.tools[Ssh]372        server_ssh.enable_public_key(client_ssh.generate_key_pairs())373        client_ssh.enable_public_key(server_ssh.generate_key_pairs())374        server_ssh.add_known_host(client_ip)375        client_ssh.add_known_host(server_ip)376        server_node.execute(377            "/opt/ibm/platform_mpi/bin/mpirun "378            f"-hostlist {server_ip}:1,{server_ip}:1 -np 2 -e "379            f"MPI_IB_PKEY={server_ib.get_pkey()} -ibv /opt/ibm/platform_mpi/help/"380            "ping_pong 4096",381            expected_exit_code=0,382            expected_exit_code_failure_message="Infiniband intra-node ping pong "383            "test failed with IBM MPI",384        )385        server_node.execute(386            "/opt/ibm/platform_mpi/bin/mpirun "387            f"-hostlist {server_ip}:1,{client_ip}:1 -np 2 -e "388            f"MPI_IB_PKEY={server_ib.get_pkey()} -ibv /opt/ibm/platform_mpi/help/"389            "ping_pong 4096",390            expected_exit_code=0,391            expected_exit_code_failure_message="Infiniband inter-node ping pong "392            "test failed with IBM MPI",393        )394    @TestCaseMetadata(395        description="""396            This test case will397            1. Ensure RDMA is setup398            2. Install MVAPICH MPI399            3. Set up ssh keys of server/client connection400            4. Run MPI pingpong tests401            5. Run other MPI tests402            """,403        priority=4,404        requirement=simple_requirement(405            supported_features=[Infiniband],406            min_count=2,407        ),408    )409    def verify_mvapich_mpi(self, environment: Environment, log: Logger) -> None:410        server_node = environment.nodes[0]411        client_node = environment.nodes[1]412        # Ensure RDMA is setup413        try:414            run_in_parallel(415                [416                    lambda: client_node.features[Infiniband],417                    lambda: server_node.features[Infiniband],418                ]419            )420        except (UnsupportedDistroException, UnsupportedKernelException) as err:421            raise SkippedException(err)422        server_ib = server_node.features[Infiniband]423        client_ib = client_node.features[Infiniband]424        run_in_parallel([server_ib.install_mvapich_mpi, client_ib.install_mvapich_mpi])425        # Restart the ssh sessions for changes to /etc/security/limits.conf426        # to take effect427        server_node.close()428        client_node.close()429        # Get the ip adresses and device name of ib device430        server_ib_interfaces = server_ib.get_ib_interfaces()431        client_ib_interfaces = client_ib.get_ib_interfaces()432        server_ip = server_ib_interfaces[0].ip_addr433        client_ip = client_ib_interfaces[0].ip_addr434        # Test relies on machines being able to ssh into each other435        server_ssh = server_node.tools[Ssh]436        client_ssh = client_node.tools[Ssh]437        server_ssh.enable_public_key(client_ssh.generate_key_pairs())438        client_ssh.enable_public_key(server_ssh.generate_key_pairs())439        server_ssh.add_known_host(client_ip)440        client_ssh.add_known_host(server_ip)441        # Run MPI tests442        find = server_node.tools[Find]443        test_names = ["IMB-MPI1", "IMB-RMA", "IMB-NBC"]444        for test in test_names:445            find_results = find.find_files(446                server_node.get_pure_path("/usr"), test, sudo=True447            )448            assert_that(len(find_results)).described_as(449                f"Could not find location of MVAPICH MPI test: {test}"450            ).is_greater_than(0)451            test_path = find_results[0]452            assert_that(test_path).described_as(453                f"Could not find location of MVAPICH MPI test: {test}"454            ).is_not_empty()455            server_node.execute(456                f"/usr/local/bin/mpirun --hosts {server_ip},{client_ip} "457                f"-n 2 -ppn 1 {test_path}",458                expected_exit_code=0,459                expected_exit_code_failure_message=f"Failed {test} test "460                "with MVAPICH MPI",461            )462    def _check_nd_enabled(self, node: Node) -> None:463        # non-SRIOV RDMA VM sizes need hv_network_direct driver to initialize device464        # non-SRIOV RDMA VM sizes will be upgraded to SR-IOV sooner465        # recent images remove this module, so skip case in this situation466        if not node.tools[KernelConfig].is_enabled("CONFIG_HYPERV_INFINIBAND_ND"):467            raise UnsupportedDistroException(468                node.os, "hv_network_direct module is not enabled"...infiniband.py
Source:infiniband.py  
...335            sudo=True,336            expected_exit_code=0,337            expected_exit_code_failure_message="Failed to install IntelMPI",338        )339    def install_open_mpi(self) -> None:340        node = self._node341        # Install Open MPI342        wget = node.tools[Wget]343        tar_file_path = wget.get(344            "https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.5.tar.gz",345            executable=True,346        )347        tar = node.tools[Tar]348        tar.extract(tar_file_path, ".", gzip=True)349        openmpi_folder = node.get_pure_path("./openmpi-4.0.5")350        node.execute(351            "./configure --enable-mpirun-prefix-by-default",352            shell=True,353            cwd=openmpi_folder,...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
