Best Python code snippet using pandera_python
strategies.py
Source:strategies.py  
...111    for column in val:112        val[column] = _mask(val[column], null_mask[column])113    return val114@composite115def set_pandas_index(116    draw,117    df_or_series_strat: SearchStrategy,118    index: IndexComponent,119):120    """Sets Index or MultiIndex object to pandas Series or DataFrame."""121    df_or_series = draw(df_or_series_strat)122    df_or_series.index = draw(index.strategy(size=df_or_series.shape[0]))123    return df_or_series124def verify_dtype(125    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],126    schema_type: str,127    name: Optional[str],128):129    """Verify that pandera_dtype argument is not None."""130    if pandera_dtype is None:131        raise SchemaDefinitionError(132            f"'{schema_type}' schema with name '{name}' has no specified "133            "dtype. You need to specify one in order to synthesize "134            "data from a strategy."135        )136def strategy_import_error(fn: F) -> F:137    """Decorator to generate input error if dependency is missing."""138    @wraps(fn)139    def _wrapper(*args, **kwargs):140        if not HAS_HYPOTHESIS:  # pragma: no cover141            raise ImportError(142                'Strategies for generating data requires "hypothesis" to be \n'143                "installed. You can install pandera together with the IO \n"144                "dependencies with:\n"145                "pip install pandera[strategies]"146            )147        return fn(*args, **kwargs)148    return cast(F, _wrapper)149def register_check_strategy(strategy_fn: StrategyFn):150    """Decorate a Check method with a strategy.151    This should be applied to a built-in :class:`~pandera.checks.Check` method.152    :param strategy_fn: add strategy to a check, using check statistics to153        generate a ``hypothesis`` strategy.154    """155    def register_check_strategy_decorator(class_method):156        """Decorator that wraps Check class method."""157        @wraps(class_method)158        def _wrapper(cls, *args, **kwargs):159            check = class_method(cls, *args, **kwargs)160            if check.statistics is None:161                raise AttributeError(162                    "check object doesn't have a defined statistics property. "163                    "Use the checks.register_check_statistics decorator to "164                    f"specify the statistics for the {class_method.__name__} "165                    "method."166                )167            strategy_kwargs = {168                arg: stat169                for arg, stat in check.statistics.items()170                if stat is not None171            }172            check.strategy = partial(strategy_fn, **strategy_kwargs)173            return check174        return _wrapper175    return register_check_strategy_decorator176# pylint: disable=line-too-long177# Values taken from178# https://hypothesis.readthedocs.io/en/latest/_modules/hypothesis/extra/numpy.html#from_dtype  # noqa179MIN_DT_VALUE = -(2**63)180MAX_DT_VALUE = 2**63 - 1181def _is_datetime_tz(pandera_dtype: DataType) -> bool:182    native_type = getattr(pandera_dtype, "type", None)183    return isinstance(native_type, pd.DatetimeTZDtype)184def _datetime_strategy(185    dtype: Union[np.dtype, pd.DatetimeTZDtype], strategy186) -> SearchStrategy:187    if isinstance(dtype, pd.DatetimeTZDtype):188        def _to_datetime(value) -> pd.DatetimeTZDtype:189            if isinstance(value, pd.Timestamp):190                return value.tz_convert(tz=dtype.tz)  # type: ignore[union-attr]191            return pd.Timestamp(value, unit=dtype.unit, tz=dtype.tz)  # type: ignore[union-attr]192        return st.builds(_to_datetime, strategy)193    else:194        res = (195            st.just(dtype.str.split("[")[-1][:-1])196            if "[" in dtype.str197            else st.sampled_from(npst.TIME_RESOLUTIONS)198        )199        return st.builds(dtype.type, strategy, res)200def numpy_time_dtypes(201    dtype: Union[np.dtype, pd.DatetimeTZDtype], min_value=None, max_value=None202):203    """Create numpy strategy for datetime and timedelta data types.204    :param dtype: numpy datetime or timedelta datatype205    :param min_value: minimum value of the datatype to create206    :param max_value: maximum value of the datatype to create207    :returns: ``hypothesis`` strategy208    """209    def _to_unix(value: Any) -> int:210        if dtype.type is np.timedelta64:211            return pd.Timedelta(value).value212        return pd.Timestamp(value).value213    min_value = MIN_DT_VALUE if min_value is None else _to_unix(min_value)214    max_value = MAX_DT_VALUE if max_value is None else _to_unix(max_value)215    return _datetime_strategy(dtype, st.integers(min_value, max_value))216def numpy_complex_dtypes(217    dtype,218    min_value: complex = complex(0, 0),219    max_value: Optional[complex] = None,220    allow_infinity: bool = None,221    allow_nan: bool = None,222):223    """Create numpy strategy for complex numbers.224    :param dtype: numpy complex number datatype225    :param min_value: minimum value, must be complex number226    :param max_value: maximum value, must be complex number227    :returns: ``hypothesis`` strategy228    """229    max_real: Optional[float]230    max_imag: Optional[float]231    if max_value:232        max_real = max_value.real233        max_imag = max_value.imag234    else:235        max_real = max_imag = None236    if dtype.itemsize == 8:237        width = 32238    else:239        width = 64240    # switch min and max values for imaginary if min value > max value241    if max_imag is not None and min_value.imag > max_imag:242        min_imag = max_imag243        max_imag = min_value.imag244    else:245        min_imag = min_value.imag246    strategy = st.builds(247        complex,248        st.floats(249            min_value=min_value.real,250            max_value=max_real,251            width=width,252            allow_infinity=allow_infinity,253            allow_nan=allow_nan,254        ),255        st.floats(256            min_value=min_imag,257            max_value=max_imag,258            width=width,259            allow_infinity=allow_infinity,260            allow_nan=allow_nan,261        ),262    ).map(dtype.type)263    @st.composite264    def build_complex(draw):265        value = draw(strategy)266        hypothesis.assume(min_value <= value)267        if max_value is not None:268            hypothesis.assume(max_value >= value)269        return value270    return build_complex()271def to_numpy_dtype(pandera_dtype: DataType):272    """Convert a :class:`~pandera.dtypes.DataType` to numpy dtype compatible273    with hypothesis."""274    try:275        np_dtype = pandas_engine.Engine.numpy_dtype(pandera_dtype)276    except TypeError as err:277        if is_datetime(pandera_dtype):278            return np.dtype("datetime64[ns]")279        raise TypeError(280            f"Data generation for the '{pandera_dtype}' data type is "281            "currently unsupported."282        ) from err283    if np_dtype == np.dtype("object") or str(pandera_dtype) == "str":284        np_dtype = np.dtype(str)285    return np_dtype286def pandas_dtype_strategy(287    pandera_dtype: DataType,288    strategy: Optional[SearchStrategy] = None,289    **kwargs,290) -> SearchStrategy:291    # pylint: disable=line-too-long,no-else-raise292    """Strategy to generate data from a :class:`pandera.dtypes.DataType`.293    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.294    :param strategy: an optional hypothesis strategy. If specified, the295        pandas dtype strategy will be chained onto this strategy.296    :kwargs: key-word arguments passed into297        `hypothesis.extra.numpy.from_dtype <https://hypothesis.readthedocs.io/en/latest/numpy.html#hypothesis.extra.numpy.from_dtype>`_ .298        For datetime, timedelta, and complex number datatypes, these arguments299        are passed into :func:`~pandera.strategies.numpy_time_dtypes` and300        :func:`~pandera.strategies.numpy_complex_dtypes`.301    :returns: ``hypothesis`` strategy302    """303    def compat_kwargs(*args):304        return {k: v for k, v in kwargs.items() if k in args}305    # hypothesis doesn't support categoricals or objects, so we'll will need to306    # build a pandera-specific solution.307    if is_category(pandera_dtype):308        raise TypeError(309            "data generation for the Category dtype is currently "310            "unsupported. Consider using a string or int dtype and "311            "Check.isin(values) to ensure a finite set of values."312        )313    np_dtype = to_numpy_dtype(pandera_dtype)314    if strategy:315        if _is_datetime_tz(pandera_dtype):316            return _datetime_strategy(pandera_dtype.type, strategy)  # type: ignore317        return strategy.map(np_dtype.type)318    elif is_datetime(pandera_dtype) or is_timedelta(pandera_dtype):319        return numpy_time_dtypes(320            pandera_dtype.type if _is_datetime_tz(pandera_dtype) else np_dtype,  # type: ignore321            **compat_kwargs("min_value", "max_value"),322        )323    elif is_complex(pandera_dtype):324        return numpy_complex_dtypes(325            np_dtype,326            **compat_kwargs(327                "min_value", "max_value", "allow_infinity", "allow_nan"328            ),329        )330    return npst.from_dtype(331        np_dtype,332        **{  # type: ignore333            "allow_nan": False,334            "allow_infinity": False,335            **kwargs,336        },337    )338def eq_strategy(339    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],340    strategy: Optional[SearchStrategy] = None,341    *,342    value: Any,343) -> SearchStrategy:344    """Strategy to generate a single value.345    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.346    :param strategy: an optional hypothesis strategy. If specified, the347        pandas dtype strategy will be chained onto this strategy.348    :param value: value to generate.349    :returns: ``hypothesis`` strategy350    """351    # override strategy preceding this one and generate value of the same type352    # pylint: disable=unused-argument353    return pandas_dtype_strategy(pandera_dtype, st.just(value))354def ne_strategy(355    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],356    strategy: Optional[SearchStrategy] = None,357    *,358    value: Any,359) -> SearchStrategy:360    """Strategy to generate anything except for a particular value.361    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.362    :param strategy: an optional hypothesis strategy. If specified, the363        pandas dtype strategy will be chained onto this strategy.364    :param value: value to avoid.365    :returns: ``hypothesis`` strategy366    """367    if strategy is None:368        strategy = pandas_dtype_strategy(pandera_dtype)369    return strategy.filter(lambda x: x != value)370def gt_strategy(371    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],372    strategy: Optional[SearchStrategy] = None,373    *,374    min_value: Union[int, float],375) -> SearchStrategy:376    """Strategy to generate values greater than a minimum value.377    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.378    :param strategy: an optional hypothesis strategy. If specified, the379        pandas dtype strategy will be chained onto this strategy.380    :param min_value: generate values larger than this.381    :returns: ``hypothesis`` strategy382    """383    if strategy is None:384        strategy = pandas_dtype_strategy(385            pandera_dtype,386            min_value=min_value,387            exclude_min=True if is_float(pandera_dtype) else None,388        )389    return strategy.filter(lambda x: x > min_value)390def ge_strategy(391    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],392    strategy: Optional[SearchStrategy] = None,393    *,394    min_value: Union[int, float],395) -> SearchStrategy:396    """Strategy to generate values greater than or equal to a minimum value.397    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.398    :param strategy: an optional hypothesis strategy. If specified, the399        pandas dtype strategy will be chained onto this strategy.400    :param min_value: generate values greater than or equal to this.401    :returns: ``hypothesis`` strategy402    """403    if strategy is None:404        return pandas_dtype_strategy(405            pandera_dtype,406            min_value=min_value,407            exclude_min=False if is_float(pandera_dtype) else None,408        )409    return strategy.filter(lambda x: x >= min_value)410def lt_strategy(411    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],412    strategy: Optional[SearchStrategy] = None,413    *,414    max_value: Union[int, float],415) -> SearchStrategy:416    """Strategy to generate values less than a maximum value.417    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.418    :param strategy: an optional hypothesis strategy. If specified, the419        pandas dtype strategy will be chained onto this strategy.420    :param max_value: generate values less than this.421    :returns: ``hypothesis`` strategy422    """423    if strategy is None:424        strategy = pandas_dtype_strategy(425            pandera_dtype,426            max_value=max_value,427            exclude_max=True if is_float(pandera_dtype) else None,428        )429    return strategy.filter(lambda x: x < max_value)430def le_strategy(431    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],432    strategy: Optional[SearchStrategy] = None,433    *,434    max_value: Union[int, float],435) -> SearchStrategy:436    """Strategy to generate values less than or equal to a maximum value.437    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.438    :param strategy: an optional hypothesis strategy. If specified, the439        pandas dtype strategy will be chained onto this strategy.440    :param max_value: generate values less than or equal to this.441    :returns: ``hypothesis`` strategy442    """443    if strategy is None:444        return pandas_dtype_strategy(445            pandera_dtype,446            max_value=max_value,447            exclude_max=False if is_float(pandera_dtype) else None,448        )449    return strategy.filter(lambda x: x <= max_value)450def in_range_strategy(451    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],452    strategy: Optional[SearchStrategy] = None,453    *,454    min_value: Union[int, float],455    max_value: Union[int, float],456    include_min: bool = True,457    include_max: bool = True,458) -> SearchStrategy:459    """Strategy to generate values within a particular range.460    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.461    :param strategy: an optional hypothesis strategy. If specified, the462        pandas dtype strategy will be chained onto this strategy.463    :param min_value: generate values greater than this.464    :param max_value: generate values less than this.465    :param include_min: include min_value in generated data.466    :param include_max: include max_value in generated data.467    :returns: ``hypothesis`` strategy468    """469    if strategy is None:470        return pandas_dtype_strategy(471            pandera_dtype,472            min_value=min_value,473            max_value=max_value,474            exclude_min=not include_min,475            exclude_max=not include_max,476        )477    min_op = operator.ge if include_min else operator.gt478    max_op = operator.le if include_max else operator.lt479    return strategy.filter(480        lambda x: min_op(x, min_value) and max_op(x, max_value)481    )482def isin_strategy(483    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],484    strategy: Optional[SearchStrategy] = None,485    *,486    allowed_values: Sequence[Any],487) -> SearchStrategy:488    """Strategy to generate values within a finite set.489    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.490    :param strategy: an optional hypothesis strategy. If specified, the491        pandas dtype strategy will be chained onto this strategy.492    :param allowed_values: set of allowable values.493    :returns: ``hypothesis`` strategy494    """495    if strategy is None:496        return pandas_dtype_strategy(497            pandera_dtype, st.sampled_from(allowed_values)498        )499    return strategy.filter(lambda x: x in allowed_values)500def notin_strategy(501    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],502    strategy: Optional[SearchStrategy] = None,503    *,504    forbidden_values: Sequence[Any],505) -> SearchStrategy:506    """Strategy to generate values excluding a set of forbidden values507    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.508    :param strategy: an optional hypothesis strategy. If specified, the509        pandas dtype strategy will be chained onto this strategy.510    :param forbidden_values: set of forbidden values.511    :returns: ``hypothesis`` strategy512    """513    if strategy is None:514        strategy = pandas_dtype_strategy(pandera_dtype)515    return strategy.filter(lambda x: x not in forbidden_values)516def str_matches_strategy(517    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],518    strategy: Optional[SearchStrategy] = None,519    *,520    pattern: str,521) -> SearchStrategy:522    """Strategy to generate strings that patch a regex pattern.523    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.524    :param strategy: an optional hypothesis strategy. If specified, the525        pandas dtype strategy will be chained onto this strategy.526    :param pattern: regex pattern.527    :returns: ``hypothesis`` strategy528    """529    if strategy is None:530        return st.from_regex(pattern, fullmatch=True).map(531            to_numpy_dtype(pandera_dtype).type532        )533    def matches(x):534        return re.match(pattern, x)535    return strategy.filter(matches)536def str_contains_strategy(537    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],538    strategy: Optional[SearchStrategy] = None,539    *,540    pattern: str,541) -> SearchStrategy:542    """Strategy to generate strings that contain a particular pattern.543    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.544    :param strategy: an optional hypothesis strategy. If specified, the545        pandas dtype strategy will be chained onto this strategy.546    :param pattern: regex pattern.547    :returns: ``hypothesis`` strategy548    """549    if strategy is None:550        return st.from_regex(pattern, fullmatch=False).map(551            to_numpy_dtype(pandera_dtype).type552        )553    def contains(x):554        return re.search(pattern, x)555    return strategy.filter(contains)556def str_startswith_strategy(557    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],558    strategy: Optional[SearchStrategy] = None,559    *,560    string: str,561) -> SearchStrategy:562    """Strategy to generate strings that start with a specific string pattern.563    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.564    :param strategy: an optional hypothesis strategy. If specified, the565        pandas dtype strategy will be chained onto this strategy.566    :param string: string pattern.567    :returns: ``hypothesis`` strategy568    """569    if strategy is None:570        return st.from_regex(f"\\A{string}", fullmatch=False).map(571            to_numpy_dtype(pandera_dtype).type572        )573    return strategy.filter(lambda x: x.startswith(string))574def str_endswith_strategy(575    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],576    strategy: Optional[SearchStrategy] = None,577    *,578    string: str,579) -> SearchStrategy:580    """Strategy to generate strings that end with a specific string pattern.581    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.582    :param strategy: an optional hypothesis strategy. If specified, the583        pandas dtype strategy will be chained onto this strategy.584    :param string: string pattern.585    :returns: ``hypothesis`` strategy586    """587    if strategy is None:588        return st.from_regex(f"{string}\\Z", fullmatch=False).map(589            to_numpy_dtype(pandera_dtype).type590        )591    return strategy.filter(lambda x: x.endswith(string))592def str_length_strategy(593    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],594    strategy: Optional[SearchStrategy] = None,595    *,596    min_value: int,597    max_value: int,598) -> SearchStrategy:599    """Strategy to generate strings of a particular length600    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.601    :param strategy: an optional hypothesis strategy. If specified, the602        pandas dtype strategy will be chained onto this strategy.603    :param min_value: minimum string length.604    :param max_value: maximum string length.605    :returns: ``hypothesis`` strategy606    """607    if strategy is None:608        return st.text(min_size=min_value, max_size=max_value).map(609            to_numpy_dtype(pandera_dtype).type610        )611    return strategy.filter(lambda x: min_value <= len(x) <= max_value)612def _timestamp_to_datetime64_strategy(613    strategy: SearchStrategy,614) -> SearchStrategy:615    """Convert timestamp to numpy.datetime64616    Hypothesis only supports pure numpy dtypes but numpy.datetime64() truncates617    nanoseconds if given a pandas.Timestamp. We need to pass the unix epoch via618    the pandas.Timestamp.value attribute.619    """620    return st.builds(lambda x: np.datetime64(x.value, "ns"), strategy)621def field_element_strategy(622    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],623    strategy: Optional[SearchStrategy] = None,624    *,625    checks: Optional[Sequence] = None,626) -> SearchStrategy:627    """Strategy to generate elements of a column or index.628    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.629    :param strategy: an optional hypothesis strategy. If specified, the630        pandas dtype strategy will be chained onto this strategy.631    :param checks: sequence of :class:`~pandera.checks.Check` s to constrain632        the values of the data in the column/index.633    :returns: ``hypothesis`` strategy634    """635    if strategy:636        raise BaseStrategyOnlyError(637            "The series strategy is a base strategy. You cannot specify the "638            "strategy argument to chain it to a parent strategy."639        )640    checks = [] if checks is None else checks641    elements = None642    def undefined_check_strategy(elements, check):643        """Strategy for checks with undefined strategies."""644        warnings.warn(645            "Element-wise check doesn't have a defined strategy."646            "Falling back to filtering drawn values based on the check "647            "definition. This can considerably slow down data-generation."648        )649        return (650            pandas_dtype_strategy(pandera_dtype)651            if elements is None652            else elements653        ).filter(check._check_fn)654    for check in checks:655        if hasattr(check, "strategy"):656            elements = check.strategy(pandera_dtype, elements)657        elif check.element_wise:658            elements = undefined_check_strategy(elements, check)659        # NOTE: vectorized checks with undefined strategies should be handled660        # by the series/dataframe strategy.661    if elements is None:662        elements = pandas_dtype_strategy(pandera_dtype)663    # Hypothesis only supports pure numpy datetime64 (i.e. timezone naive).664    # We cast to datetime64 after applying the check strategy so that checks665    # can see timezone-aware values.666    if _is_datetime_tz(pandera_dtype):667        elements = _timestamp_to_datetime64_strategy(elements)668    return elements669def series_strategy(670    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],671    strategy: Optional[SearchStrategy] = None,672    *,673    checks: Optional[Sequence] = None,674    nullable: bool = False,675    unique: bool = False,676    name: Optional[str] = None,677    size: Optional[int] = None,678):679    """Strategy to generate a pandas Series.680    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.681    :param strategy: an optional hypothesis strategy. If specified, the682        pandas dtype strategy will be chained onto this strategy.683    :param checks: sequence of :class:`~pandera.checks.Check` s to constrain684        the values of the data in the column/index.685    :param nullable: whether or not generated Series contains null values.686    :param unique: whether or not generated Series contains unique values.687    :param name: name of the Series.688    :param size: number of elements in the Series.689    :returns: ``hypothesis`` strategy.690    """691    elements = field_element_strategy(pandera_dtype, strategy, checks=checks)692    strategy = (693        pdst.series(694            elements=elements,695            dtype=to_numpy_dtype(pandera_dtype),696            index=pdst.range_indexes(697                min_size=0 if size is None else size, max_size=size698            ),699            unique=unique,700        )701        .filter(lambda x: x.shape[0] > 0)702        .map(lambda x: x.rename(name))703        .map(lambda x: x.astype(pandera_dtype.type))704    )705    if nullable:706        strategy = null_field_masks(strategy)707    def undefined_check_strategy(strategy, check):708        """Strategy for checks with undefined strategies."""709        warnings.warn(710            "Vectorized check doesn't have a defined strategy."711            "Falling back to filtering drawn values based on the check "712            "definition. This can considerably slow down data-generation."713        )714        def _check_fn(series):715            return check(series).check_passed716        return strategy.filter(_check_fn)717    for check in checks if checks is not None else []:718        if not hasattr(check, "strategy") and not check.element_wise:719            strategy = undefined_check_strategy(strategy, check)720    return strategy721def column_strategy(722    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],723    strategy: Optional[SearchStrategy] = None,724    *,725    checks: Optional[Sequence] = None,726    unique: bool = False,727    name: Optional[str] = None,728):729    # pylint: disable=line-too-long730    """Create a data object describing a column in a DataFrame.731    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.732    :param strategy: an optional hypothesis strategy. If specified, the733        pandas dtype strategy will be chained onto this strategy.734    :param checks: sequence of :class:`~pandera.checks.Check` s to constrain735        the values of the data in the column/index.736    :param unique: whether or not generated Series contains unique values.737    :param name: name of the Series.738    :returns: a `column <https://hypothesis.readthedocs.io/en/latest/numpy.html#hypothesis.extra.pandas.column>`_ object.739    """740    verify_dtype(pandera_dtype, schema_type="column", name=name)741    elements = field_element_strategy(pandera_dtype, strategy, checks=checks)742    return pdst.column(743        name=name,744        elements=elements,745        dtype=to_numpy_dtype(pandera_dtype),746        unique=unique,747    )748def index_strategy(749    pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],750    strategy: Optional[SearchStrategy] = None,751    *,752    checks: Optional[Sequence] = None,753    nullable: bool = False,754    unique: bool = False,755    name: Optional[str] = None,756    size: Optional[int] = None,757):758    """Strategy to generate a pandas Index.759    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.760    :param strategy: an optional hypothesis strategy. If specified, the761        pandas dtype strategy will be chained onto this strategy.762    :param checks: sequence of :class:`~pandera.checks.Check` s to constrain763        the values of the data in the column/index.764    :param nullable: whether or not generated Series contains null values.765    :param unique: whether or not generated Series contains unique values.766    :param name: name of the Series.767    :param size: number of elements in the Series.768    :returns: ``hypothesis`` strategy.769    """770    verify_dtype(pandera_dtype, schema_type="index", name=name)771    elements = field_element_strategy(pandera_dtype, strategy, checks=checks)772    strategy = pdst.indexes(773        elements=elements,774        dtype=to_numpy_dtype(pandera_dtype),775        min_size=0 if size is None else size,776        max_size=size,777        unique=unique,778    ).map(lambda x: x.astype(pandera_dtype.type))779    # this is a hack to convert np.str_ data values into native python str.780    col_dtype = str(pandera_dtype)781    if col_dtype in {"object", "str"} or col_dtype.startswith("string"):782        # pylint: disable=cell-var-from-loop,undefined-loop-variable783        strategy = strategy.map(lambda index: index.map(str))784    if name is not None:785        strategy = strategy.map(lambda index: index.rename(name))786    if nullable:787        strategy = null_field_masks(strategy)788    return strategy789def dataframe_strategy(790    pandera_dtype: Optional[DataType] = None,791    strategy: Optional[SearchStrategy] = None,792    *,793    columns: Optional[Dict] = None,794    checks: Optional[Sequence] = None,795    unique: Optional[List[str]] = None,796    index: Optional[IndexComponent] = None,797    size: Optional[int] = None,798    n_regex_columns: int = 1,799):800    """Strategy to generate a pandas DataFrame.801    :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.802    :param strategy: if specified, this will raise a BaseStrategyOnlyError,803        since it cannot be chained to a prior strategy.804    :param columns: a dictionary where keys are column names and values805        are :class:`~pandera.schema_components.Column` objects.806    :param checks: sequence of :class:`~pandera.checks.Check` s to constrain807        the values of the data at the dataframe level.808    :param unique: a list of column names that should be jointly unique.809    :param index: Index or MultiIndex schema component.810    :param size: number of elements in the Series.811    :param n_regex_columns: number of regex columns to generate.812    :returns: ``hypothesis`` strategy.813    """814    # pylint: disable=too-many-locals,too-many-branches,too-many-statements815    if n_regex_columns < 1:816        raise ValueError(817            "`n_regex_columns` must be a positive integer, found: "818            f"{n_regex_columns}"819        )820    if strategy:821        raise BaseStrategyOnlyError(822            "The dataframe strategy is a base strategy. You cannot specify "823            "the strategy argument to chain it to a parent strategy."824        )825    columns = {} if columns is None else columns826    checks = [] if checks is None else checks827    def undefined_check_strategy(strategy, check, column=None):828        """Strategy for checks with undefined strategies."""829        def _element_wise_check_fn(element):830            return check._check_fn(element)831        def _column_check_fn(dataframe):832            return check(dataframe[column]).check_passed833        def _dataframe_check_fn(dataframe):834            return check(dataframe).check_passed835        if check.element_wise:836            check_fn = _element_wise_check_fn837            warning_type = "Element-wise"838        elif column is None:839            check_fn = _dataframe_check_fn840            warning_type = "Dataframe"841        else:842            check_fn = _column_check_fn843            warning_type = "Column"844        warnings.warn(845            f"{warning_type} check doesn't have a defined strategy. "846            "Falling back to filtering drawn values based on the check "847            "definition. This can considerably slow down data-generation."848        )849        return strategy.filter(check_fn)850    def make_row_strategy(col, checks):851        strategy = None852        for check in checks:853            if hasattr(check, "strategy"):854                strategy = check.strategy(col.dtype, strategy)855            else:856                strategy = undefined_check_strategy(857                    strategy=(858                        pandas_dtype_strategy(col.dtype)859                        if strategy is None860                        else strategy861                    ),862                    check=check,863                )864        if strategy is None:865            strategy = pandas_dtype_strategy(col.dtype)866        return strategy867    @composite868    def _dataframe_strategy(draw):869        row_strategy_checks = []870        undefined_strat_df_checks = []871        for check in checks:872            if hasattr(check, "strategy") or check.element_wise:873                # we can apply element-wise checks defined at the dataframe874                # level to the row strategy875                row_strategy_checks.append(check)876            else:877                undefined_strat_df_checks.append(check)878        # expand column set to generate column names for columns where879        # regex=True.880        expanded_columns = {}881        for col_name, column in columns.items():882            if unique and col_name in unique:883                # if the column is in the set of columns specified in `unique`,884                # make the column strategy independently unique. This is885                # technically stricter than it should be, since the list of886                # columns in `unique` are required to be jointly unique, but887                # this is a simple solution that produces synthetic data that888                # fulfills the uniqueness constraints of the dataframe.889                column = deepcopy(column)890                column.unique = True891            if not column.regex:892                expanded_columns[col_name] = column893            else:894                regex_columns = draw(895                    st.lists(896                        st.from_regex(column.name, fullmatch=True),897                        min_size=n_regex_columns,898                        max_size=n_regex_columns,899                        unique=True,900                    )901                )902                for regex_col in regex_columns:903                    expanded_columns[regex_col] = deepcopy(column).set_name(904                        regex_col905                    )906        # collect all non-element-wise column checks with undefined strategies907        undefined_strat_column_checks: Dict[str, list] = defaultdict(list)908        for col_name, column in expanded_columns.items():909            undefined_strat_column_checks[col_name].extend(910                check911                for check in column.checks912                if not hasattr(check, "strategy") and not check.element_wise913            )914        # override the column datatype with dataframe-level datatype if915        # specified916        col_dtypes = {917            col_name: str(col.dtype)918            if pandera_dtype is None919            else str(pandera_dtype)920            for col_name, col in expanded_columns.items()921        }922        nullable_columns = {923            col_name: col.nullable924            for col_name, col in expanded_columns.items()925        }926        row_strategy = None927        if row_strategy_checks:928            row_strategy = st.fixed_dictionaries(929                {930                    col_name: make_row_strategy(col, row_strategy_checks)931                    for col_name, col in expanded_columns.items()932                }933            )934        strategy = pdst.data_frames(935            columns=[936                column.strategy_component()937                for column in expanded_columns.values()938            ],939            rows=row_strategy,940            index=pdst.range_indexes(941                min_size=0 if size is None else size, max_size=size942            ),943        )944        # this is a hack to convert np.str_ data values into native python str.945        for col_name, col_dtype in col_dtypes.items():946            if col_dtype in {"object", "str"} or col_dtype.startswith(947                "string"948            ):949                # pylint: disable=cell-var-from-loop,undefined-loop-variable950                strategy = strategy.map(951                    lambda df: df.assign(**{col_name: df[col_name].map(str)})952                )953        strategy = strategy.map(954            lambda df: df if df.empty else df.astype(col_dtypes)955        )956        if size is not None and size > 0 and any(nullable_columns.values()):957            strategy = null_dataframe_masks(strategy, nullable_columns)958        if index is not None:959            strategy = set_pandas_index(strategy, index)960        for check in undefined_strat_df_checks:961            strategy = undefined_check_strategy(strategy, check)962        for col_name, column_checks in undefined_strat_column_checks.items():963            for check in column_checks:  # type: ignore964                strategy = undefined_check_strategy(965                    strategy, check, column=col_name966                )967        return draw(strategy)968    return _dataframe_strategy()969# pylint: disable=unused-argument970def multiindex_strategy(971    pandera_dtype: Optional[DataType] = None,972    strategy: Optional[SearchStrategy] = None,973    *,...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
