Test your AI Agents with the all-new Agent to Agent Testing Platform.Learn More
How to use _pandas_obj_to_validate method in pandera

Best Python code snippet using pandera_python
schemas.py
Source:schemas.py
...585                    col.dtype = self.dtype586                schema_components.append(col)587        if self.index is not None:588            schema_components.append(self.index)589        df_to_validate = _pandas_obj_to_validate(590            check_obj, head, tail, sample, random_state591        )592        check_results = []593        # schema-component-level checks594        for schema_component in schema_components:595            try:596                result = schema_component(597                    df_to_validate,598                    lazy=lazy,599                    # don't make a copy of the data600                    inplace=True,601                )602                check_results.append(check_utils.is_table(result))603            except errors.SchemaError as err:604                error_handler.collect_error("schema_component_check", err)605            except errors.SchemaErrors as err:606                for schema_error_dict in err.schema_errors:607                    error_handler.collect_error(608                        "schema_component_check", schema_error_dict["error"]609                    )610        # dataframe-level checks611        for check_index, check in enumerate(self.checks):612            try:613                check_results.append(614                    _handle_check_results(615                        self, check_index, check, df_to_validate616                    )617                )618            except errors.SchemaError as err:619                error_handler.collect_error("dataframe_check", err)620        if self.unique:621            # NOTE: fix this pylint error622            # pylint: disable=not-an-iterable623            temp_unique: List[List] = (624                [self.unique]625                if all(isinstance(x, str) for x in self.unique)626                else self.unique627            )628            for lst in temp_unique:629                duplicates = df_to_validate.duplicated(subset=lst, keep=False)630                if duplicates.any():631                    # NOTE: this is a hack to support koalas, need to figure632                    # out a workaround to error: "Cannot combine the series or633                    # dataframe because it comes from a different dataframe."634                    if type(duplicates).__module__.startswith(635                        "databricks.koalas"636                    ):637                        # pylint: disable=import-outside-toplevel638                        import databricks.koalas as ks639                        with ks.option_context(640                            "compute.ops_on_diff_frames", True641                        ):642                            failure_cases = df_to_validate.loc[duplicates, lst]643                    else:644                        failure_cases = df_to_validate.loc[duplicates, lst]645                    failure_cases = reshape_failure_cases(failure_cases)646                    error_handler.collect_error(647                        "duplicates",648                        errors.SchemaError(649                            self,650                            check_obj,651                            f"columns '{*lst,}' not unique:\n{failure_cases}",652                            failure_cases=failure_cases,653                            check="multiple_fields_uniqueness",654                        ),655                    )656        if lazy and error_handler.collected_errors:657            raise errors.SchemaErrors(658                error_handler.collected_errors, check_obj659            )660        assert all(check_results), "all check results must be True."661        return check_obj662    def __call__(663        self,664        dataframe: pd.DataFrame,665        head: Optional[int] = None,666        tail: Optional[int] = None,667        sample: Optional[int] = None,668        random_state: Optional[int] = None,669        lazy: bool = False,670        inplace: bool = False,671    ):672        """Alias for :func:`DataFrameSchema.validate` method.673        :param pd.DataFrame dataframe: the dataframe to be validated.674        :param head: validate the first n rows. Rows overlapping with `tail` or675            `sample` are de-duplicated.676        :type head: int677        :param tail: validate the last n rows. Rows overlapping with `head` or678            `sample` are de-duplicated.679        :type tail: int680        :param sample: validate a random sample of n rows. Rows overlapping681            with `head` or `tail` are de-duplicated.682        :param random_state: random seed for the ``sample`` argument.683        :param lazy: if True, lazily evaluates dataframe against all validation684            checks and raises a ``SchemaErrors``. Otherwise, raise685            ``SchemaError`` as soon as one occurs.686        :param inplace: if True, applies coercion to the object of validation,687            otherwise creates a copy of the data.688        """689        return self.validate(690            dataframe, head, tail, sample, random_state, lazy, inplace691        )692    def __repr__(self) -> str:693        """Represent string for logging."""694        return (695            f"<Schema {self.__class__.__name__}("696            f"columns={self.columns}, "697            f"checks={self.checks}, "698            f"index={self.index.__repr__()}, "699            f"coerce={self.coerce}, "700            f"dtype={self._dtype},"701            f"strict={self.strict},"702            f"name={self.name},"703            f"ordered={self.ordered}"704            ")>"705        )706    def __str__(self) -> str:707        """Represent string for user inspection."""708        def _format_multiline(json_str, arg):709            return "\n".join(710                f"{indent}{line}" if i != 0 else f"{indent}{arg}={line}"711                for i, line in enumerate(json_str.split("\n"))712            )713        indent = " " * N_INDENT_SPACES714        if self.columns:715            columns_str = f"{indent}columns={{\n"716            for colname, col in self.columns.items():717                columns_str += f"{indent * 2}'{colname}': {col}\n"718            columns_str += f"{indent}}}"719        else:720            columns_str = f"{indent}columns={{}}"721        if self.checks:722            checks_str = f"{indent}checks=[\n"723            for check in self.checks:724                checks_str += f"{indent * 2}{check}\n"725            checks_str += f"{indent}]"726        else:727            checks_str = f"{indent}checks=[]"728        # add additional indents729        index_ = str(self.index).split("\n")730        if len(index_) == 1:731            index = str(self.index)732        else:733            index = "\n".join(734                x if i == 0 else f"{indent}{x}" for i, x in enumerate(index_)735            )736        return (737            f"<Schema {self.__class__.__name__}(\n"738            f"{columns_str},\n"739            f"{checks_str},\n"740            f"{indent}coerce={self.coerce},\n"741            f"{indent}dtype={self._dtype},\n"742            f"{indent}index={index},\n"743            f"{indent}strict={self.strict}\n"744            f"{indent}name={self.name},\n"745            f"{indent}ordered={self.ordered}\n"746            ")>"747        )748    def __eq__(self, other: object) -> bool:749        if not isinstance(other, type(self)):750            return NotImplemented751        def _compare_dict(obj):752            return {753                k: v for k, v in obj.__dict__.items() if k != "_IS_INFERRED"754            }755        return _compare_dict(self) == _compare_dict(other)756    @st.strategy_import_error757    def strategy(758        self, *, size: Optional[int] = None, n_regex_columns: int = 1759    ):760        """Create a ``hypothesis`` strategy for generating a DataFrame.761        :param size: number of elements to generate762        :param n_regex_columns: number of regex columns to generate.763        :returns: a strategy that generates pandas DataFrame objects.764        """765        return st.dataframe_strategy(766            self.dtype,767            columns=self.columns,768            checks=self.checks,769            unique=self.unique,770            index=self.index,771            size=size,772            n_regex_columns=n_regex_columns,773        )774    def example(775        self, size: Optional[int] = None, n_regex_columns: int = 1776    ) -> pd.DataFrame:777        """Generate an example of a particular size.778        :param size: number of elements in the generated DataFrame.779        :returns: pandas DataFrame object.780        """781        # pylint: disable=import-outside-toplevel,cyclic-import,import-error782        import hypothesis783        with warnings.catch_warnings():784            warnings.simplefilter(785                "ignore",786                category=hypothesis.errors.NonInteractiveExampleWarning,787            )788            return self.strategy(789                size=size, n_regex_columns=n_regex_columns790            ).example()791    @_inferred_schema_guard792    def add_columns(793        self, extra_schema_cols: Dict[str, Any]794    ) -> "DataFrameSchema":795        """Create a copy of the :class:`DataFrameSchema` with extra columns.796        :param extra_schema_cols: Additional columns of the format797        :type extra_schema_cols: DataFrameSchema798        :returns: a new :class:`DataFrameSchema` with the extra_schema_cols799            added.800        :example:801        To add columns to the schema, pass a dictionary with column name and802        ``Column`` instance key-value pairs.803        >>> import pandera as pa804        >>>805        >>> example_schema = pa.DataFrameSchema(806        ...    {807        ...        "category": pa.Column(str),808        ...        "probability": pa.Column(float),809        ...    }810        ... )811        >>> print(812        ...     example_schema.add_columns({"even_number": pa.Column(pa.Bool)})813        ... )814        <Schema DataFrameSchema(815            columns={816                'category': <Schema Column(name=category, type=DataType(str))>817                'probability': <Schema Column(name=probability, type=DataType(float64))>818                'even_number': <Schema Column(name=even_number, type=DataType(bool))>819            },820            checks=[],821            coerce=False,822            dtype=None,823            index=None,824            strict=False825            name=None,826            ordered=False827        )>828        .. seealso:: :func:`remove_columns`829        """830        schema_copy = copy.deepcopy(self)831        schema_copy.columns = {832            **schema_copy.columns,833            **DataFrameSchema(extra_schema_cols).columns,834        }835        return schema_copy836    @_inferred_schema_guard837    def remove_columns(self, cols_to_remove: List[str]) -> "DataFrameSchema":838        """Removes columns from a :class:`DataFrameSchema` and returns a new839        copy.840        :param cols_to_remove: Columns to be removed from the841            ``DataFrameSchema``842        :type cols_to_remove: List843        :returns: a new :class:`DataFrameSchema` without the cols_to_remove844        :raises: :class:`~pandera.errors.SchemaInitError`: if column not in845            schema.846        :example:847        To remove a column or set of columns from a schema, pass a list of848        columns to be removed:849        >>> import pandera as pa850        >>>851        >>> example_schema = pa.DataFrameSchema(852        ...     {853        ...         "category" : pa.Column(str),854        ...         "probability": pa.Column(float)855        ...     }856        ... )857        >>>858        >>> print(example_schema.remove_columns(["category"]))859        <Schema DataFrameSchema(860            columns={861                'probability': <Schema Column(name=probability, type=DataType(float64))>862            },863            checks=[],864            coerce=False,865            dtype=None,866            index=None,867            strict=False868            name=None,869            ordered=False870        )>871        .. seealso:: :func:`add_columns`872        """873        schema_copy = copy.deepcopy(self)874        # ensure all specified keys are present in the columns875        not_in_cols: List[str] = [876            x for x in cols_to_remove if x not in schema_copy.columns.keys()877        ]878        if not_in_cols:879            raise errors.SchemaInitError(880                f"Keys {not_in_cols} not found in schema columns!"881            )882        for col in cols_to_remove:883            schema_copy.columns.pop(col)884        return schema_copy885    @_inferred_schema_guard886    def update_column(self, column_name: str, **kwargs) -> "DataFrameSchema":887        """Create copy of a :class:`DataFrameSchema` with updated column888        properties.889        :param column_name:890        :param kwargs: key-word arguments supplied to891            :class:`~pandera.schema_components.Column`892        :returns: a new :class:`DataFrameSchema` with updated column893        :raises: :class:`~pandera.errors.SchemaInitError`: if column not in894            schema or you try to change the name.895        :example:896        Calling ``schema.1`` returns the :class:`DataFrameSchema`897        with the updated column.898        >>> import pandera as pa899        >>>900        >>> example_schema = pa.DataFrameSchema({901        ...     "category" : pa.Column(str),902        ...     "probability": pa.Column(float)903        ... })904        >>> print(905        ...     example_schema.update_column(906        ...         'category', dtype=pa.Category907        ...     )908        ... )909        <Schema DataFrameSchema(910            columns={911                'category': <Schema Column(name=category, type=DataType(category))>912                'probability': <Schema Column(name=probability, type=DataType(float64))>913            },914            checks=[],915            coerce=False,916            dtype=None,917            index=None,918            strict=False919            name=None,920            ordered=False921        )>922        .. seealso:: :func:`rename_columns`923        """924        # check that columns exist in schema925        if "name" in kwargs:926            raise ValueError("cannot update 'name' of the column.")927        if column_name not in self.columns:928            raise ValueError(f"column '{column_name}' not in {self}")929        schema_copy = copy.deepcopy(self)930        column_copy = copy.deepcopy(self.columns[column_name])931        new_column = column_copy.__class__(932            **{**column_copy.properties, **kwargs}933        )934        schema_copy.columns.update({column_name: new_column})935        return schema_copy936    def update_columns(937        self, update_dict: Dict[str, Dict[str, Any]]938    ) -> "DataFrameSchema":939        """940        Create copy of a :class:`DataFrameSchema` with updated column941        properties.942        :param update_dict:943        :return: a new :class:`DataFrameSchema` with updated columns944        :raises: :class:`~pandera.errors.SchemaInitError`: if column not in945            schema or you try to change the name.946        :example:947        Calling ``schema.update_columns`` returns the :class:`DataFrameSchema`948        with the updated columns.949        >>> import pandera as pa950        >>>951        >>> example_schema = pa.DataFrameSchema({952        ...     "category" : pa.Column(str),953        ...     "probability": pa.Column(float)954        ... })955        >>>956        >>> print(957        ...     example_schema.update_columns(958        ...         {"category": {"dtype":pa.Category}}959        ...     )960        ... )961        <Schema DataFrameSchema(962            columns={963                'category': <Schema Column(name=category, type=DataType(category))>964                'probability': <Schema Column(name=probability, type=DataType(float64))>965            },966            checks=[],967            coerce=False,968            dtype=None,969            index=None,970            strict=False971            name=None,972            ordered=False973        )>974        .. note:: This is the successor to the ``update_column`` method, which975            will be deprecated.976        """977        new_schema = copy.deepcopy(self)978        # ensure all specified keys are present in the columns979        not_in_cols: List[str] = [980            x for x in update_dict.keys() if x not in new_schema.columns.keys()981        ]982        if not_in_cols:983            raise errors.SchemaInitError(984                f"Keys {not_in_cols} not found in schema columns!"985            )986        new_columns: Dict[str, Column] = {}987        for col in new_schema.columns:988            # check989            if update_dict.get(col):990                if update_dict[col].get("name"):991                    raise errors.SchemaInitError(992                        "cannot update 'name' \993                                             property of the column."994                    )995            original_properties = new_schema.columns[col].properties996            if update_dict.get(col):997                new_properties = copy.deepcopy(original_properties)998                new_properties.update(update_dict[col])999                new_columns[col] = new_schema.columns[col].__class__(1000                    **new_properties1001                )1002            else:1003                new_columns[col] = new_schema.columns[col].__class__(1004                    **original_properties1005                )1006        new_schema.columns = new_columns1007        return new_schema1008    def rename_columns(self, rename_dict: Dict[str, str]) -> "DataFrameSchema":1009        """Rename columns using a dictionary of key-value pairs.1010        :param rename_dict: dictionary of 'old_name': 'new_name' key-value1011            pairs.1012        :returns: :class:`DataFrameSchema` (copy of original)1013        :raises: :class:`~pandera.errors.SchemaInitError` if column not in the1014            schema.1015        :example:1016        To rename a column or set of columns, pass a dictionary of old column1017        names and new column names, similar to the pandas DataFrame method.1018        >>> import pandera as pa1019        >>>1020        >>> example_schema = pa.DataFrameSchema({1021        ...     "category" : pa.Column(str),1022        ...     "probability": pa.Column(float)1023        ... })1024        >>>1025        >>> print(1026        ...     example_schema.rename_columns({1027        ...         "category": "categories",1028        ...         "probability": "probabilities"1029        ...     })1030        ... )1031        <Schema DataFrameSchema(1032            columns={1033                'categories': <Schema Column(name=categories, type=DataType(str))>1034                'probabilities': <Schema Column(name=probabilities, type=DataType(float64))>1035            },1036            checks=[],1037            coerce=False,1038            dtype=None,1039            index=None,1040            strict=False1041            name=None,1042            ordered=False1043        )>1044        .. seealso:: :func:`update_column`1045        """1046        new_schema = copy.deepcopy(self)1047        # ensure all specified keys are present in the columns1048        not_in_cols: List[str] = [1049            x for x in rename_dict.keys() if x not in new_schema.columns.keys()1050        ]1051        if not_in_cols:1052            raise errors.SchemaInitError(1053                f"Keys {not_in_cols} not found in schema columns!"1054            )1055        # ensure all new keys are not present in the current column names1056        already_in_columns: List[str] = [1057            x for x in rename_dict.values() if x in new_schema.columns.keys()1058        ]1059        if already_in_columns:1060            raise errors.SchemaInitError(1061                f"Keys {already_in_columns} already found in schema columns!"1062            )1063        # We iterate over the existing columns dict and replace those keys1064        # that exist in the rename_dict1065        new_columns = {1066            (rename_dict[col_name] if col_name in rename_dict else col_name): (1067                col_attrs.set_name(rename_dict[col_name])1068                if col_name in rename_dict1069                else col_attrs1070            )1071            for col_name, col_attrs in new_schema.columns.items()1072        }1073        new_schema.columns = new_columns1074        return new_schema1075    def select_columns(self, columns: List[Any]) -> "DataFrameSchema":1076        """Select subset of columns in the schema.1077        *New in version 0.4.5*1078        :param columns: list of column names to select.1079        :returns:  :class:`DataFrameSchema` (copy of original) with only1080            the selected columns.1081        :raises: :class:`~pandera.errors.SchemaInitError` if column not in the1082            schema.1083        :example:1084        To subset a schema by column, and return a new schema:1085        >>> import pandera as pa1086        >>>1087        >>> example_schema = pa.DataFrameSchema({1088        ...     "category" : pa.Column(str),1089        ...     "probability": pa.Column(float)1090        ... })1091        >>>1092        >>> print(example_schema.select_columns(['category']))1093        <Schema DataFrameSchema(1094            columns={1095                'category': <Schema Column(name=category, type=DataType(str))>1096            },1097            checks=[],1098            coerce=False,1099            dtype=None,1100            index=None,1101            strict=False1102            name=None,1103            ordered=False1104        )>1105        .. note:: If an index is present in the schema, it will also be1106            included in the new schema.1107        """1108        new_schema = copy.deepcopy(self)1109        # ensure all specified keys are present in the columns1110        not_in_cols: List[str] = [1111            x for x in columns if x not in new_schema.columns.keys()1112        ]1113        if not_in_cols:1114            raise errors.SchemaInitError(1115                f"Keys {not_in_cols} not found in schema columns!"1116            )1117        new_columns = {1118            col_name: column1119            for col_name, column in self.columns.items()1120            if col_name in columns1121        }1122        new_schema.columns = new_columns1123        return new_schema1124    def to_script(self, fp: Union[str, Path] = None) -> "DataFrameSchema":1125        """Create DataFrameSchema from yaml file.1126        :param path: str, Path to write script1127        :returns: dataframe schema.1128        """1129        # pylint: disable=import-outside-toplevel,cyclic-import1130        import pandera.io1131        return pandera.io.to_script(self, fp)1132    @classmethod1133    def from_yaml(cls, yaml_schema) -> "DataFrameSchema":1134        """Create DataFrameSchema from yaml file.1135        :param yaml_schema: str, Path to yaml schema, or serialized yaml1136            string.1137        :returns: dataframe schema.1138        """1139        # pylint: disable=import-outside-toplevel,cyclic-import1140        import pandera.io1141        return pandera.io.from_yaml(yaml_schema)1142    def to_yaml(self, stream: Optional[os.PathLike] = None):1143        """Write DataFrameSchema to yaml file.1144        :param stream: file stream to write to. If None, dumps to string.1145        :returns: yaml string if stream is None, otherwise returns None.1146        """1147        # pylint: disable=import-outside-toplevel,cyclic-import1148        import pandera.io1149        return pandera.io.to_yaml(self, stream=stream)1150    def set_index(1151        self, keys: List[str], drop: bool = True, append: bool = False1152    ) -> "DataFrameSchema":1153        """1154        A method for setting the :class:`Index` of a :class:`DataFrameSchema`,1155        via an existing :class:`Column` or list of columns.1156        :param keys: list of labels1157        :param drop: bool, default True1158        :param append: bool, default False1159        :return: a new :class:`DataFrameSchema` with specified column(s) in the1160            index.1161        :raises: :class:`~pandera.errors.SchemaInitError` if column not in the1162            schema.1163        :examples:1164        Just as you would set the index in a ``pandas`` DataFrame from an1165        existing column, you can set an index within the schema from an1166        existing column in the schema.1167        >>> import pandera as pa1168        >>>1169        >>> example_schema = pa.DataFrameSchema({1170        ...     "category" : pa.Column(str),1171        ...     "probability": pa.Column(float)})1172        >>>1173        >>> print(example_schema.set_index(['category']))1174        <Schema DataFrameSchema(1175            columns={1176                'probability': <Schema Column(name=probability, type=DataType(float64))>1177            },1178            checks=[],1179            coerce=False,1180            dtype=None,1181            index=<Schema Index(name=category, type=DataType(str))>,1182            strict=False1183            name=None,1184            ordered=False1185        )>1186        If you have an existing index in your schema, and you would like to1187        append a new column as an index to it (yielding a :class:`Multiindex`),1188        just use set_index as you would in pandas.1189        >>> example_schema = pa.DataFrameSchema(1190        ...     {1191        ...         "column1": pa.Column(str),1192        ...         "column2": pa.Column(int)1193        ...     },1194        ...     index=pa.Index(name = "column3", dtype = int)1195        ... )1196        >>>1197        >>> print(example_schema.set_index(["column2"], append = True))1198        <Schema DataFrameSchema(1199            columns={1200                'column1': <Schema Column(name=column1, type=DataType(str))>1201            },1202            checks=[],1203            coerce=False,1204            dtype=None,1205            index=<Schema MultiIndex(1206                indexes=[1207                    <Schema Index(name=column3, type=DataType(int64))>1208                    <Schema Index(name=column2, type=DataType(int64))>1209                ]1210                coerce=False,1211                strict=False,1212                name=None,1213                ordered=True1214            )>,1215            strict=False1216            name=None,1217            ordered=False1218        )>1219        .. seealso:: :func:`reset_index`1220        """1221        # pylint: disable=import-outside-toplevel,cyclic-import1222        from pandera.schema_components import Index, MultiIndex1223        new_schema = copy.deepcopy(self)1224        keys_temp: List = (1225            list(set(keys)) if not isinstance(keys, list) else keys1226        )1227        # ensure all specified keys are present in the columns1228        not_in_cols: List[str] = [1229            x for x in keys_temp if x not in new_schema.columns.keys()1230        ]1231        if not_in_cols:1232            raise errors.SchemaInitError(1233                f"Keys {not_in_cols} not found in schema columns!"1234            )1235        # if there is already an index, append or replace according to1236        # parameters1237        ind_list: List = (1238            []1239            if new_schema.index is None or not append1240            else list(new_schema.index.indexes)1241            if isinstance(new_schema.index, MultiIndex) and append1242            else [new_schema.index]1243        )1244        for col in keys_temp:1245            ind_list.append(1246                Index(1247                    dtype=new_schema.columns[col].dtype,1248                    name=col,1249                    checks=new_schema.columns[col].checks,1250                    nullable=new_schema.columns[col].nullable,1251                    unique=new_schema.columns[col].unique,1252                    coerce=new_schema.columns[col].coerce,1253                )1254            )1255        new_schema.index = (1256            ind_list[0] if len(ind_list) == 1 else MultiIndex(ind_list)1257        )1258        # if drop is True as defaulted, drop the columns moved into the index1259        if drop:1260            new_schema = new_schema.remove_columns(keys_temp)1261        return new_schema1262    def reset_index(1263        self, level: List[str] = None, drop: bool = False1264    ) -> "DataFrameSchema":1265        """1266        A method for resetting the :class:`Index` of a :class:`DataFrameSchema`1267        :param level: list of labels1268        :param drop: bool, default True1269        :return: a new :class:`DataFrameSchema` with specified column(s) in the1270            index.1271        :raises: :class:`~pandera.errors.SchemaInitError` if no index set in1272            schema.1273        :examples:1274        Similar to the ``pandas`` reset_index method on a pandas DataFrame,1275        this method can be used to to fully or partially reset indices of a1276        schema.1277        To remove the entire index from the schema, just call the reset_index1278        method with default parameters.1279        >>> import pandera as pa1280        >>>1281        >>> example_schema = pa.DataFrameSchema(1282        ...     {"probability" : pa.Column(float)},1283        ...     index = pa.Index(name="unique_id", dtype=int)1284        ... )1285        >>>1286        >>> print(example_schema.reset_index())1287        <Schema DataFrameSchema(1288            columns={1289                'probability': <Schema Column(name=probability, type=DataType(float64))>1290                'unique_id': <Schema Column(name=unique_id, type=DataType(int64))>1291            },1292            checks=[],1293            coerce=False,1294            dtype=None,1295            index=None,1296            strict=False1297            name=None,1298            ordered=False1299        )>1300        This reclassifies an index (or indices) as a column (or columns).1301        Similarly, to partially alter the index, pass the name of the column1302        you would like to be removed to the ``level`` parameter, and you may1303        also decide whether to drop the levels with the ``drop`` parameter.1304        >>> example_schema = pa.DataFrameSchema({1305        ...     "category" : pa.Column(str)},1306        ...     index = pa.MultiIndex([1307        ...         pa.Index(name="unique_id1", dtype=int),1308        ...         pa.Index(name="unique_id2", dtype=str)1309        ...         ]1310        ...     )1311        ... )1312        >>> print(example_schema.reset_index(level = ["unique_id1"]))1313        <Schema DataFrameSchema(1314            columns={1315                'category': <Schema Column(name=category, type=DataType(str))>1316                'unique_id1': <Schema Column(name=unique_id1, type=DataType(int64))>1317            },1318            checks=[],1319            coerce=False,1320            dtype=None,1321            index=<Schema Index(name=unique_id2, type=DataType(str))>,1322            strict=False1323            name=None,1324            ordered=False1325        )>1326        .. seealso:: :func:`set_index`1327        """1328        # pylint: disable=import-outside-toplevel,cyclic-import1329        from pandera.schema_components import Column, Index, MultiIndex1330        new_schema = copy.deepcopy(self)1331        if new_schema.index is None:1332            raise errors.SchemaInitError(1333                "There is currently no index set for this schema."1334            )1335        # ensure no duplicates1336        level_temp: Union[List[Any], List[str]] = (1337            list(set(level)) if level is not None else []1338        )1339        # ensure all specified keys are present in the index1340        level_not_in_index: Union[List[Any], List[str], None] = (1341            [x for x in level_temp if x not in new_schema.index.names]1342            if isinstance(new_schema.index, MultiIndex) and level_temp1343            else []1344            if isinstance(new_schema.index, Index)1345            and (level_temp == [new_schema.index.name])1346            else level_temp1347        )1348        if level_not_in_index:1349            raise errors.SchemaInitError(1350                f"Keys {level_not_in_index} not found in schema columns!"1351            )1352        new_index = (1353            None1354            if not level_temp or isinstance(new_schema.index, Index)1355            else new_schema.index.remove_columns(level_temp)1356        )1357        new_index = (1358            new_index1359            if new_index is None1360            else Index(1361                dtype=new_index.columns[list(new_index.columns)[0]].dtype,1362                checks=new_index.columns[list(new_index.columns)[0]].checks,1363                nullable=new_index.columns[1364                    list(new_index.columns)[0]1365                ].nullable,1366                unique=new_index.columns[list(new_index.columns)[0]].unique,1367                coerce=new_index.columns[list(new_index.columns)[0]].coerce,1368                name=new_index.columns[list(new_index.columns)[0]].name,1369            )1370            if (len(list(new_index.columns)) == 1) and (new_index is not None)1371            else None1372            if (len(list(new_index.columns)) == 0) and (new_index is not None)1373            else new_index1374        )1375        if not drop:1376            additional_columns: Dict[str, Any] = (1377                {col: new_schema.index.columns.get(col) for col in level_temp}1378                if isinstance(new_schema.index, MultiIndex)1379                else {new_schema.index.name: new_schema.index}1380            )1381            new_schema = new_schema.add_columns(1382                {1383                    k: Column(1384                        dtype=v.dtype,1385                        checks=v.checks,1386                        nullable=v.nullable,1387                        unique=v.unique,1388                        coerce=v.coerce,1389                        name=v.name,1390                    )1391                    for (k, v) in additional_columns.items()1392                }1393            )1394        new_schema.index = new_index1395        return new_schema1396    @classmethod1397    def __get_validators__(cls):1398        yield cls._pydantic_validate1399    @classmethod1400    def _pydantic_validate(cls, schema: Any) -> "DataFrameSchema":1401        """Verify that the input is a compatible DataFrameSchema."""1402        if not isinstance(schema, cls):  # type: ignore1403            raise TypeError(f"{schema} is not a {cls}.")1404        return cast("DataFrameSchema", schema)1405class SeriesSchemaBase:1406    """Base series validator object."""1407    @deprecate_pandas_dtype1408    def __init__(1409        self,1410        dtype: PandasDtypeInputTypes = None,1411        checks: CheckList = None,1412        nullable: bool = False,1413        unique: bool = False,1414        allow_duplicates: Optional[bool] = None,1415        coerce: bool = False,1416        name: Any = None,1417        pandas_dtype: PandasDtypeInputTypes = None,1418        title: Optional[str] = None,1419        description: Optional[str] = None,1420    ) -> None:1421        """Initialize series schema base object.1422        :param dtype: datatype of the column. If a string is specified,1423            then assumes one of the valid pandas string values:1424            http://pandas.pydata.org/pandas-docs/stable/basics.html#dtypes1425        :param checks: If element_wise is True, then callable signature should1426            be:1427            ``Callable[Any, bool]`` where the ``Any`` input is a scalar element1428            in the column. Otherwise, the input is assumed to be a1429            pandas.Series object.1430        :param nullable: Whether or not column can contain null values.1431        :param unique: Whether or not column can contain duplicate1432            values.1433        :param allow_duplicates: Whether or not column can contain duplicate1434            values.1435        .. warning::1436            This option will be deprecated in 0.8.0. Use the ``unique``1437            argument instead.1438        :param coerce: If True, when schema.validate is called the column will1439            be coerced into the specified dtype. This has no effect on columns1440            where ``dtype=None``.1441        :param name: column name in dataframe to validate.1442        :param pandas_dtype: alias of ``dtype`` for backwards compatibility.1443            .. warning:: This option will be deprecated in 0.8.01444        :param title: A human-readable label for the series.1445        :param description: An arbitrary textual description of the series.1446        :type nullable: bool1447        """1448        if checks is None:1449            checks = []1450        if isinstance(checks, (Check, Hypothesis)):1451            checks = [checks]1452        if allow_duplicates is not None:1453            warnings.warn(1454                "The `allow_duplicates` will be deprecated in "1455                "favor of the `unique` keyword. The value of "1456                "`unique` will be set to the opposite of "1457                "the `allow_duplicates` keyword.",1458                DeprecationWarning,1459            )1460            unique = not allow_duplicates1461        self.dtype = dtype or pandas_dtype  # type: ignore1462        self._nullable = nullable1463        self._coerce = coerce1464        self._checks = checks1465        self._name = name1466        self._unique = unique1467        self._title = title1468        self._description = description1469        for check in self.checks:1470            if check.groupby is not None and not self._allow_groupby:1471                raise errors.SchemaInitError(1472                    f"Cannot use groupby checks with type {type(self)}"1473                )1474        # make sure pandas dtype is valid1475        self.dtype  # pylint: disable=pointless-statement1476        # this attribute is not meant to be accessed by users and is explicitly1477        # set to True in the case that a schema is created by infer_schema.1478        self._IS_INFERRED = False1479    # the _is_inferred getter and setter methods are not public1480    @property1481    def _is_inferred(self):1482        return self._IS_INFERRED1483    @_is_inferred.setter1484    def _is_inferred(self, value: bool):1485        self._IS_INFERRED = value1486    @property1487    def checks(self):1488        """Return list of checks or hypotheses."""1489        return self._checks1490    @checks.setter1491    def checks(self, checks):1492        self._checks = checks1493    @_inferred_schema_guard1494    def set_checks(self, checks: CheckList):1495        """Create a new SeriesSchema with a new set of Checks1496        :param checks: checks to set on the new schema1497        :returns: a new SeriesSchema with a new set of checks1498        """1499        schema_copy = copy.deepcopy(self)1500        schema_copy.checks = checks1501        return schema_copy1502    @property1503    def nullable(self) -> bool:1504        """Whether the series is nullable."""1505        return self._nullable1506    @property1507    def unique(self) -> bool:1508        """Whether to check for duplicates in check object"""1509        return self._unique1510    @unique.setter1511    def unique(self, value: bool) -> None:1512        """Set unique attribute"""1513        self._unique = value1514    @property1515    def allow_duplicates(self) -> bool:1516        """Whether to allow duplicate values."""1517        return not self._unique1518    @allow_duplicates.setter1519    def allow_duplicates(self, value: bool) -> None:1520        """Set allow_duplicates attribute."""1521        self._unique = not value1522    @property1523    def coerce(self) -> bool:1524        """Whether to coerce series to specified type."""1525        return self._coerce1526    @coerce.setter1527    def coerce(self, value: bool) -> None:1528        """Set coerce attribute."""1529        self._coerce = value1530    @property1531    def name(self) -> Union[str, None]:1532        """Get SeriesSchema name."""1533        return self._name1534    @property1535    def title(self):1536        """A human-readable label for the series."""1537        return self._title1538    @property1539    def description(self):1540        """An arbitrary textual description of the series."""1541        return self._description1542    @property1543    def dtype(1544        self,1545    ) -> DataType:1546        """Get the pandas dtype"""1547        return self._dtype  # type: ignore1548    @dtype.setter1549    def dtype(self, value: PandasDtypeInputTypes) -> None:1550        """Set the pandas dtype"""1551        self._dtype = pandas_engine.Engine.dtype(value) if value else None1552    def coerce_dtype(self, obj: Union[pd.Series, pd.Index]) -> pd.Series:1553        """Coerce type of a pd.Series by type specified in dtype.1554        :param pd.Series series: One-dimensional ndarray with axis labels1555            (including time series).1556        :returns: ``Series`` with coerced data type1557        """1558        if self.dtype is None:1559            return obj1560        try:1561            return self.dtype.try_coerce(obj)1562        except errors.ParserError as exc:1563            msg = (1564                f"Error while coercing '{self.name}' to type "1565                f"{self.dtype}: {exc}:\n{exc.failure_cases}"1566            )1567            raise errors.SchemaError(1568                self,1569                obj,1570                msg,1571                failure_cases=exc.failure_cases,1572                check=f"coerce_dtype('{self.dtype}')",1573            ) from exc1574    @property1575    def _allow_groupby(self):1576        """Whether the schema or schema component allows groupby operations."""1577        raise NotImplementedError(  # pragma: no cover1578            "The _allow_groupby property must be implemented by subclasses "1579            "of SeriesSchemaBase"1580        )1581    def validate(1582        self,1583        check_obj: Union[pd.DataFrame, pd.Series],1584        head: Optional[int] = None,1585        tail: Optional[int] = None,1586        sample: Optional[int] = None,1587        random_state: Optional[int] = None,1588        lazy: bool = False,1589        inplace: bool = False,1590    ) -> Union[pd.DataFrame, pd.Series]:1591        # pylint: disable=too-many-locals,too-many-branches,too-many-statements1592        """Validate a series or specific column in dataframe.1593        :check_obj: pandas DataFrame or Series to validate.1594        :param head: validate the first n rows. Rows overlapping with `tail` or1595            `sample` are de-duplicated.1596        :param tail: validate the last n rows. Rows overlapping with `head` or1597            `sample` are de-duplicated.1598        :param sample: validate a random sample of n rows. Rows overlapping1599            with `head` or `tail` are de-duplicated.1600        :param random_state: random seed for the ``sample`` argument.1601        :param lazy: if True, lazily evaluates dataframe against all validation1602            checks and raises a ``SchemaErrors``. Otherwise, raise1603            ``SchemaError`` as soon as one occurs.1604        :param inplace: if True, applies coercion to the object of validation,1605            otherwise creates a copy of the data.1606        :returns: validated DataFrame or Series.1607        """1608        if self._is_inferred:1609            warnings.warn(1610                f"This {type(self)} is an inferred schema that hasn't been "1611                "modified. It's recommended that you refine the schema "1612                "by calling `set_checks` before using it to validate data.",1613                UserWarning,1614            )1615        error_handler = SchemaErrorHandler(lazy)1616        if not inplace:1617            check_obj = check_obj.copy()1618        series = (1619            check_obj1620            if check_utils.is_field(check_obj)1621            else check_obj[self.name]1622        )1623        series = _pandas_obj_to_validate(1624            series, head, tail, sample, random_state1625        )1626        check_obj = _pandas_obj_to_validate(1627            check_obj, head, tail, sample, random_state1628        )1629        if self.name is not None and series.name != self._name:1630            msg = (1631                f"Expected {type(self)} to have name '{self._name}', found "1632                f"'{series.name}'"1633            )1634            error_handler.collect_error(1635                "wrong_field_name",1636                errors.SchemaError(1637                    self,1638                    check_obj,1639                    msg,1640                    failure_cases=scalar_failure_case(series.name),1641                    check=f"field_name('{self._name}')",1642                ),1643            )1644        if not self._nullable:1645            nulls = series.isna()1646            if nulls.sum() > 0:1647                failed = series[nulls]1648                msg = (1649                    f"non-nullable series '{series.name}' contains null "1650                    f"values:\n{failed}"1651                )1652                error_handler.collect_error(1653                    "series_contains_nulls",1654                    errors.SchemaError(1655                        self,1656                        check_obj,1657                        msg,1658                        failure_cases=reshape_failure_cases(1659                            series[nulls], ignore_na=False1660                        ),1661                        check="not_nullable",1662                    ),1663                )1664        # Check if the series contains duplicate values1665        if self._unique:1666            if type(series).__module__.startswith("databricks.koalas"):1667                duplicates = (1668                    series.to_frame().duplicated().reindex(series.index)1669                )1670                # pylint: disable=import-outside-toplevel1671                import databricks.koalas as ks1672                with ks.option_context("compute.ops_on_diff_frames", True):1673                    failed = series[duplicates]1674            else:1675                duplicates = series.duplicated()1676                failed = series[duplicates]1677            if duplicates.any():1678                msg = (1679                    f"series '{series.name}' contains duplicate values:\n"1680                    f"{failed}"1681                )1682                error_handler.collect_error(1683                    "series_contains_duplicates",1684                    errors.SchemaError(1685                        self,1686                        check_obj,1687                        msg,1688                        failure_cases=reshape_failure_cases(failed),1689                        check="field_uniqueness",1690                    ),1691                )1692        if self._dtype is not None and (1693            not self._dtype.check(pandas_engine.Engine.dtype(series.dtype))1694        ):1695            msg = (1696                f"expected series '{series.name}' to have type {self._dtype}, "1697                + f"got {series.dtype}"1698            )1699            error_handler.collect_error(1700                "wrong_dtype",1701                errors.SchemaError(1702                    self,1703                    check_obj,1704                    msg,1705                    failure_cases=scalar_failure_case(str(series.dtype)),1706                    check=f"dtype('{self.dtype}')",1707                ),1708            )1709        check_results = []1710        if check_utils.is_field(check_obj):1711            check_obj, check_args = series, [None]1712        else:1713            check_args = [self.name]  # type: ignore1714        for check_index, check in enumerate(self.checks):1715            try:1716                check_results.append(1717                    _handle_check_results(1718                        self, check_index, check, check_obj, *check_args1719                    )1720                )1721            except errors.SchemaError as err:1722                error_handler.collect_error("dataframe_check", err)1723            except Exception as err:  # pylint: disable=broad-except1724                # catch other exceptions that may occur when executing the1725                # Check1726                err_msg = f'"{err.args[0]}"' if len(err.args) > 0 else ""1727                err_str = f"{err.__class__.__name__}({ err_msg})"1728                msg = (1729                    f"Error while executing check function: {err_str}\n"1730                    + traceback.format_exc()1731                )1732                error_handler.collect_error(1733                    "check_error",1734                    errors.SchemaError(1735                        self,1736                        check_obj,1737                        msg,1738                        failure_cases=scalar_failure_case(err_str),1739                        check=check,1740                        check_index=check_index,1741                    ),1742                    original_exc=err,1743                )1744        if lazy and error_handler.collected_errors:1745            raise errors.SchemaErrors(1746                error_handler.collected_errors, check_obj1747            )1748        assert all(check_results)1749        return check_obj1750    def __call__(1751        self,1752        check_obj: Union[pd.DataFrame, pd.Series],1753        head: Optional[int] = None,1754        tail: Optional[int] = None,1755        sample: Optional[int] = None,1756        random_state: Optional[int] = None,1757        lazy: bool = False,1758        inplace: bool = False,1759    ) -> Union[pd.DataFrame, pd.Series]:1760        """Alias for ``validate`` method."""1761        return self.validate(1762            check_obj, head, tail, sample, random_state, lazy, inplace1763        )1764    def __eq__(self, other):1765        return self.__dict__ == other.__dict__1766    @st.strategy_import_error1767    def strategy(self, *, size=None):1768        """Create a ``hypothesis`` strategy for generating a Series.1769        :param size: number of elements to generate1770        :returns: a strategy that generates pandas Series objects.1771        """1772        return st.series_strategy(1773            self.dtype,1774            checks=self.checks,1775            nullable=self.nullable,1776            unique=self.unique,1777            name=self.name,1778            size=size,1779        )1780    def example(self, size=None) -> pd.Series:1781        """Generate an example of a particular size.1782        :param size: number of elements in the generated Series.1783        :returns: pandas Series object.1784        """1785        # pylint: disable=import-outside-toplevel,cyclic-import,import-error1786        import hypothesis1787        with warnings.catch_warnings():1788            warnings.simplefilter(1789                "ignore",1790                category=hypothesis.errors.NonInteractiveExampleWarning,1791            )1792            return self.strategy(size=size).example()1793    def __repr__(self):1794        return (1795            f"<Schema {self.__class__.__name__}"1796            f"(name={self._name}, type={self.dtype!r})>"1797        )1798    @classmethod1799    def __get_validators__(cls):1800        yield cls._pydantic_validate1801    @classmethod1802    def _pydantic_validate(  # type: ignore1803        cls: TSeriesSchemaBase, schema: Any1804    ) -> TSeriesSchemaBase:1805        """Verify that the input is a compatible DataFrameSchema."""1806        if not isinstance(schema, cls):  # type: ignore1807            raise TypeError(f"{schema} is not a {cls}.")1808        return cast(TSeriesSchemaBase, schema)1809class SeriesSchema(SeriesSchemaBase):1810    """Series validator."""1811    @deprecate_pandas_dtype1812    def __init__(1813        self,1814        dtype: PandasDtypeInputTypes = None,1815        checks: CheckList = None,1816        index=None,1817        nullable: bool = False,1818        unique: bool = False,1819        allow_duplicates: Optional[bool] = None,1820        coerce: bool = False,1821        name: str = None,1822        pandas_dtype: PandasDtypeInputTypes = None,1823        title: Optional[str] = None,1824        description: Optional[str] = None,1825    ) -> None:1826        """Initialize series schema base object.1827        :param dtype: datatype of the column. If a string is specified,1828            then assumes one of the valid pandas string values:1829            http://pandas.pydata.org/pandas-docs/stable/basics.html#dtypes1830        :param checks: If element_wise is True, then callable signature should1831            be:1832            ``Callable[Any, bool]`` where the ``Any`` input is a scalar element1833            in the column. Otherwise, the input is assumed to be a1834            pandas.Series object.1835        :param index: specify the datatypes and properties of the index.1836        :param nullable: Whether or not column can contain null values.1837        :param unique: Whether or not column can contain duplicate1838            values.1839        :param allow_duplicates: Whether or not column can contain duplicate1840            values.1841        .. warning::1842            This option will be deprecated in 0.8.0. Use the ``unique``1843            argument instead.1844        :param coerce: If True, when schema.validate is called the column will1845            be coerced into the specified dtype. This has no effect on columns1846            where ``pandas_dtype=None``.1847        :param name: series name.1848        :param pandas_dtype: alias of ``dtype`` for backwards compatibility.1849        :param title: A human-readable label for the series.1850        :param description: An arbitrary textual description of the series.1851            .. warning:: This option will be deprecated in 0.8.01852        """1853        super().__init__(1854            dtype,1855            checks,1856            nullable,1857            unique,1858            allow_duplicates,1859            coerce,1860            name,1861            pandas_dtype,1862            title,1863            description,1864        )1865        self.index = index1866    @property1867    def _allow_groupby(self) -> bool:1868        """Whether the schema or schema component allows groupby operations."""1869        return False1870    def validate(1871        self,1872        check_obj: pd.Series,1873        head: Optional[int] = None,1874        tail: Optional[int] = None,1875        sample: Optional[int] = None,1876        random_state: Optional[int] = None,1877        lazy: bool = False,1878        inplace: bool = False,1879    ) -> pd.Series:1880        """Validate a Series object.1881        :param check_obj: One-dimensional ndarray with axis labels1882            (including time series).1883        :param head: validate the first n rows. Rows overlapping with `tail` or1884            `sample` are de-duplicated.1885        :param tail: validate the last n rows. Rows overlapping with `head` or1886            `sample` are de-duplicated.1887        :param sample: validate a random sample of n rows. Rows overlapping1888            with `head` or `tail` are de-duplicated.1889        :param random_state: random seed for the ``sample`` argument.1890        :param lazy: if True, lazily evaluates dataframe against all validation1891            checks and raises a ``SchemaErrors``. Otherwise, raise1892            ``SchemaError`` as soon as one occurs.1893        :param inplace: if True, applies coercion to the object of validation,1894            otherwise creates a copy of the data.1895        :returns: validated Series.1896        :raises SchemaError: when ``DataFrame`` violates built-in or custom1897            checks.1898        :example:1899        >>> import pandas as pd1900        >>> import pandera as pa1901        >>>1902        >>> series_schema = pa.SeriesSchema(1903        ...     float, [1904        ...         pa.Check(lambda s: s > 0),1905        ...         pa.Check(lambda s: s < 1000),1906        ...         pa.Check(lambda s: s.mean() > 300),1907        ...     ])1908        >>> series = pd.Series([1, 100, 800, 900, 999], dtype=float)1909        >>> print(series_schema.validate(series))1910        0      1.01911        1    100.01912        2    800.01913        3    900.01914        4    999.01915        dtype: float641916        """1917        if not check_utils.is_field(check_obj):1918            raise TypeError(f"expected pd.Series, got {type(check_obj)}")1919        if hasattr(check_obj, "dask"):1920            # special case for dask series1921            if inplace:1922                check_obj = check_obj.pandera.add_schema(self)1923            else:1924                check_obj = check_obj.copy()1925            check_obj = check_obj.map_partitions(1926                self._validate,1927                head=head,1928                tail=tail,1929                sample=sample,1930                random_state=random_state,1931                lazy=lazy,1932                inplace=inplace,1933                meta=check_obj,1934            )1935            return check_obj.pandera.add_schema(self)1936        return self._validate(1937            check_obj=check_obj,1938            head=head,1939            tail=tail,1940            sample=sample,1941            random_state=random_state,1942            lazy=lazy,1943            inplace=inplace,1944        )1945    def _validate(1946        self,1947        check_obj: pd.Series,1948        head: Optional[int] = None,1949        tail: Optional[int] = None,1950        sample: Optional[int] = None,1951        random_state: Optional[int] = None,1952        lazy: bool = False,1953        inplace: bool = False,1954    ) -> pd.Series:1955        if not inplace:1956            check_obj = check_obj.copy()1957        if hasattr(check_obj, "pandera"):1958            check_obj = check_obj.pandera.add_schema(self)1959        error_handler = SchemaErrorHandler(lazy=lazy)1960        if self.coerce:1961            try:1962                check_obj = self.coerce_dtype(check_obj)1963                if hasattr(check_obj, "pandera"):1964                    check_obj = check_obj.pandera.add_schema(self)1965            except errors.SchemaError as exc:1966                error_handler.collect_error("dtype_coercion_error", exc)1967        # validate index1968        if self.index:1969            # coerce data type using index schema copy to prevent mutation1970            # of original index schema attribute.1971            _index = copy.deepcopy(self.index)1972            _index.coerce = _index.coerce or self.coerce1973            try:1974                check_obj = _index(1975                    check_obj, head, tail, sample, random_state, lazy, inplace1976                )1977            except errors.SchemaError as exc:1978                error_handler.collect_error("dtype_coercion_error", exc)1979            except errors.SchemaErrors as err:1980                for schema_error_dict in err.schema_errors:1981                    error_handler.collect_error(1982                        "index_check", schema_error_dict["error"]1983                    )1984        # validate series1985        try:1986            super().validate(1987                check_obj, head, tail, sample, random_state, lazy, inplace1988            )1989        except errors.SchemaErrors as err:1990            for schema_error_dict in err.schema_errors:1991                error_handler.collect_error(1992                    "series_check", schema_error_dict["error"]1993                )1994        if error_handler.collected_errors:1995            raise errors.SchemaErrors(1996                error_handler.collected_errors, check_obj1997            )1998        return check_obj1999    def __call__(2000        self,2001        check_obj: pd.Series,2002        head: Optional[int] = None,2003        tail: Optional[int] = None,2004        sample: Optional[int] = None,2005        random_state: Optional[int] = None,2006        lazy: bool = False,2007        inplace: bool = False,2008    ) -> pd.Series:2009        """Alias for :func:`SeriesSchema.validate` method."""2010        return self.validate(2011            check_obj, head, tail, sample, random_state, lazy, inplace2012        )2013    def __eq__(self, other):2014        return self.__dict__ == other.__dict__2015def _pandas_obj_to_validate(2016    dataframe_or_series: Union[pd.DataFrame, pd.Series],2017    head: Optional[int],2018    tail: Optional[int],2019    sample: Optional[int],2020    random_state: Optional[int],2021) -> Union[pd.DataFrame, pd.Series]:2022    pandas_obj_subsample = []2023    if head is not None:2024        pandas_obj_subsample.append(dataframe_or_series.head(head))2025    if tail is not None:2026        pandas_obj_subsample.append(dataframe_or_series.tail(tail))2027    if sample is not None:2028        pandas_obj_subsample.append(2029            dataframe_or_series.sample(sample, random_state=random_state)...
Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.