Best Python code snippet using pandera_python
utils.py
Source:utils.py  
...4import numpy as np5import pandas as pd6from .. import check_utils7from .type_aliases import PandasObject8def numpy_pandas_coercible(series: pd.Series, type_: Any) -> pd.Series:9    """Checks whether a series is coercible with respect to a type.10    Bisects the series until all the failure cases are found.11    NOTE: this does not account for koalas .astype behavior, which defaults12    to converting uncastable values to NA values.13    """14    # pylint: disable=import-outside-toplevel,cyclic-import15    from pandera.engines import pandas_engine16    data_type = pandas_engine.Engine.dtype(type_)17    def _bisect(series):18        assert (19            series.shape[0] >= 220        ), "cannot bisect a pandas Series of length < 2"21        bisect_index = series.shape[0] // 222        return [series.iloc[:bisect_index], series.iloc[bisect_index:]]23    def _coercible(series):24        try:25            data_type.coerce(series)26            return True27        except Exception:  # pylint:disable=broad-except28            return False29    search_list = [series] if series.size == 1 else _bisect(series)30    failure_index = []31    while search_list:32        candidates = []33        for _series in search_list:34            if _series.shape[0] == 1 and not _coercible(_series):35                # if series is reduced to a single value and isn't coercible,36                # keep track of its index value.37                failure_index.append(_series.index.item())38            elif not _coercible(_series):39                # if the series length > 1, add it to the candidates list40                # to be further bisected41                candidates.append(_series)42        # the new search list is a flat list of bisected series views.43        search_list = list(44            itertools.chain.from_iterable([_bisect(c) for c in candidates])45        )46    # NOTE: this is a hack to support koalas. This needs to be thoroughly47    # tested, right now koalas returns NA when a dtype value can't be coerced48    # into the target dtype.49    if type(series).__module__.startswith(50        "databricks.koalas"51    ):  # pragma: no cover52        out = type(series)(53            series.index.isin(failure_index).to_series().to_numpy(),  # type: ignore[union-attr]54            index=series.index.values.to_numpy(),55            name=series.name,56        )57        out.index.name = series.index.name58        return out59    return pd.Series(~series.index.isin(failure_index), index=series.index)60def numpy_pandas_coerce_failure_cases(61    data_container: Union[PandasObject, np.ndarray], type_: Any62) -> PandasObject:63    """64    Get the failure cases resulting from trying to coerce a pandas/numpy object65    into particular data type.66    """67    # pylint: disable=import-outside-toplevel,cyclic-import68    from pandera import error_formatters69    from pandera.engines import pandas_engine70    data_type = pandas_engine.Engine.dtype(type_)71    if isinstance(data_container, np.ndarray):72        if len(data_container.shape) == 1:73            data_container = pd.Series(data_container)74        elif len(data_container.shape) == 2:75            data_container = pd.DataFrame(data_container)76        else:77            raise ValueError(78                "only numpy arrays of 1 or 2 dimensions are supported"79            )80    if check_utils.is_index(data_container):81        data_container = data_container.to_series()  # type: ignore[union-attr]82    if check_utils.is_table(data_container):83        check_output = data_container.apply(  # type: ignore[union-attr]84            numpy_pandas_coercible,85            args=(data_type,),86        )87        _, failure_cases = check_utils.prepare_dataframe_check_output(88            data_container,89            check_output,90            ignore_na=False,91        )92    elif check_utils.is_field(data_container):93        check_output = numpy_pandas_coercible(data_container, data_type)94        _, failure_cases = check_utils.prepare_series_check_output(95            data_container,96            check_output,97            ignore_na=False,98        )99    else:100        raise TypeError(101            f"type of data_container {type(data_container)} not understood. "102            "Must be a pandas Series, Index, or DataFrame."103        )104    return error_formatters.reshape_failure_cases(105        failure_cases, ignore_na=False...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
