Best Python code snippet using pandera_python
checks.py
Source:checks.py  
1"""Data validation checks."""2import inspect3import operator4import re5from collections import ChainMap, namedtuple6from functools import partial, wraps7from itertools import chain8from typing import (9    Any,10    Callable,11    Dict,12    Iterable,13    List,14    Optional,15    Type,16    TypeVar,17    Union,18    no_type_check,19)20import pandas as pd21from . import check_utils, constants, errors22from . import strategies as st23CheckResult = namedtuple(24    "CheckResult",25    ["check_output", "check_passed", "checked_object", "failure_cases"],26)27GroupbyObject = Union[28    pd.core.groupby.SeriesGroupBy, pd.core.groupby.DataFrameGroupBy29]30SeriesCheckObj = Union[pd.Series, Dict[str, pd.Series]]31DataFrameCheckObj = Union[pd.DataFrame, Dict[str, pd.DataFrame]]32def register_check_statistics(statistics_args):33    """Decorator to set statistics based on Check method."""34    def register_check_statistics_decorator(class_method):35        @wraps(class_method)36        def _wrapper(cls, *args, **kwargs):37            args = list(args)38            arg_names = inspect.getfullargspec(class_method).args[1:]39            if not arg_names:40                arg_names = statistics_args41            args_dict = {**dict(zip(arg_names, args)), **kwargs}42            check = class_method(cls, *args, **kwargs)43            check.statistics = {44                stat: args_dict.get(stat) for stat in statistics_args45            }46            check.statistics_args = statistics_args47            return check48        return _wrapper49    return register_check_statistics_decorator50_T = TypeVar("_T", bound="_CheckBase")51class _CheckMeta(type):  # pragma: no cover52    """Check metaclass."""53    REGISTERED_CUSTOM_CHECKS: Dict[str, Callable] = {}  # noqa54    def __getattr__(cls, name: str) -> Any:55        """Prevent attribute errors for registered checks."""56        attr = ChainMap(cls.__dict__, cls.REGISTERED_CUSTOM_CHECKS).get(name)57        if attr is None:58            raise AttributeError(59                f"'{cls}' object has no attribute '{name}'. "60                "Make sure any custom checks have been registered "61                "using the extensions api."62            )63        return attr64    def __dir__(cls) -> Iterable[str]:65        """Allow custom checks to show up as attributes when autocompleting."""66        return chain(super().__dir__(), cls.REGISTERED_CUSTOM_CHECKS.keys())67    # pylint: disable=line-too-long68    # mypy has limited metaclass support so this doesn't pass typecheck69    # see https://mypy.readthedocs.io/en/stable/metaclasses.html#gotchas-and-limitations-of-metaclass-support70    # pylint: enable=line-too-long71    @no_type_check72    def __contains__(cls: Type[_T], item: Union[_T, str]) -> bool:73        """Allow lookups for registered checks."""74        if isinstance(item, cls):75            name = item.name76            return hasattr(cls, name)77        # assume item is str78        return hasattr(cls, item)79class _CheckBase(metaclass=_CheckMeta):80    """Check base class."""81    def __init__(82        self,83        check_fn: Union[84            Callable[[pd.Series], Union[pd.Series, bool]],85            Callable[[pd.DataFrame], Union[pd.DataFrame, pd.Series, bool]],86        ],87        groups: Optional[Union[str, List[str]]] = None,88        groupby: Optional[Union[str, List[str], Callable]] = None,89        ignore_na: bool = True,90        element_wise: bool = False,91        name: str = None,92        error: Optional[str] = None,93        raise_warning: bool = False,94        n_failure_cases: Union[int, None] = constants.N_FAILURE_CASES,95        title: Optional[str] = None,96        description: Optional[str] = None,97        **check_kwargs,98    ) -> None:99        """Apply a validation function to each element, Series, or DataFrame.100        :param check_fn: A function to check pandas data structure. For Column101            or SeriesSchema checks, if element_wise is True, this function102            should have the signature: ``Callable[[pd.Series],103            Union[pd.Series, bool]]``, where the output series is a boolean104            vector.105            If element_wise is False, this function should have the signature:106            ``Callable[[Any], bool]``, where ``Any`` is an element in the107            column.108            For DataFrameSchema checks, if element_wise=True, fn109            should have the signature: ``Callable[[pd.DataFrame],110            Union[pd.DataFrame, pd.Series, bool]]``, where the output dataframe111            or series contains booleans.112            If element_wise is True, fn is applied to each row in113            the dataframe with the signature ``Callable[[pd.Series], bool]``114            where the series input is a row in the dataframe.115        :param groups: The dict input to the `fn` callable will be constrained116            to the groups specified by `groups`.117        :param groupby: If a string or list of strings is provided, these118            columns are used to group the Column series. If a119            callable is passed, the expected signature is: ``Callable[120            [pd.DataFrame], pd.core.groupby.DataFrameGroupBy]``121            The the case of ``Column`` checks, this function has access to the122            entire dataframe, but ``Column.name`` is selected from this123            DataFrameGroupby object so that a SeriesGroupBy object is passed124            into ``check_fn``.125            Specifying the groupby argument changes the ``check_fn`` signature126            to:127            ``Callable[[Dict[Union[str, Tuple[str]], pd.Series]], Union[bool, pd.Series]]``  # noqa128            where the input is a dictionary mapping129            keys to subsets of the column/dataframe.130        :param ignore_na: If True, null values will be ignored when determining131            if a check passed or failed. For dataframes, ignores rows with any132            null value. *New in version 0.4.0*133        :param element_wise: Whether or not to apply validator in an134            element-wise fashion. If bool, assumes that all checks should be135            applied to the column element-wise. If list, should be the same136            number of elements as checks.137        :param name: optional name for the check.138        :param error: custom error message if series fails validation139            check.140        :param raise_warning: if True, raise a UserWarning and do not throw141            exception instead of raising a SchemaError for a specific check.142            This option should be used carefully in cases where a failing143            check is informational and shouldn't stop execution of the program.144        :param n_failure_cases: report the first n unique failure cases. If145            None, report all failure cases.146        :param title: A human-readable label for the check.147        :param description: An arbitrary textual description of the check.148        :param check_kwargs: key-word arguments to pass into ``check_fn``149        :example:150        >>> import pandas as pd151        >>> import pandera as pa152        >>>153        >>>154        >>> # column checks are vectorized by default155        >>> check_positive = pa.Check(lambda s: s > 0)156        >>>157        >>> # define an element-wise check158        >>> check_even = pa.Check(lambda x: x % 2 == 0, element_wise=True)159        >>>160        >>> # checks can be given human-readable metadata161        >>> check_with_metadata = pa.Check(162        ...     lambda x: True,163        ...     title="Always passes",164        ...     description="This check always passes."165        ... )166        >>>167        >>> # specify assertions across categorical variables using `groupby`,168        >>> # for example, make sure the mean measure for group "A" is always169        >>> # larger than the mean measure for group "B"170        >>> check_by_group = pa.Check(171        ...     lambda measures: measures["A"].mean() > measures["B"].mean(),172        ...     groupby=["group"],173        ... )174        >>>175        >>> # define a wide DataFrame-level check176        >>> check_dataframe = pa.Check(177        ...     lambda df: df["measure_1"] > df["measure_2"])178        >>>179        >>> measure_checks = [check_positive, check_even, check_by_group]180        >>>181        >>> schema = pa.DataFrameSchema(182        ...     columns={183        ...         "measure_1": pa.Column(int, checks=measure_checks),184        ...         "measure_2": pa.Column(int, checks=measure_checks),185        ...         "group": pa.Column(str),186        ...     },187        ...     checks=check_dataframe188        ... )189        >>>190        >>> df = pd.DataFrame({191        ...     "measure_1": [10, 12, 14, 16],192        ...     "measure_2": [2, 4, 6, 8],193        ...     "group": ["B", "B", "A", "A"]194        ... })195        >>>196        >>> schema.validate(df)[["measure_1", "measure_2", "group"]]197            measure_1  measure_2 group198        0         10          2     B199        1         12          4     B200        2         14          6     A201        3         16          8     A202        See :ref:`here<checks>` for more usage details.203        """204        if element_wise and groupby is not None:205            raise errors.SchemaInitError(206                "Cannot use groupby when element_wise=True."207            )208        self._check_fn = check_fn209        self._check_kwargs = check_kwargs210        self.element_wise = element_wise211        self.error = error212        self.name = name or getattr(213            self._check_fn, "__name__", self._check_fn.__class__.__name__214        )215        self.ignore_na = ignore_na216        self.raise_warning = raise_warning217        self.n_failure_cases = n_failure_cases218        self.title = title219        self.description = description220        if groupby is None and groups is not None:221            raise ValueError(222                "`groupby` argument needs to be provided when `groups` "223                "argument is defined"224            )225        if isinstance(groupby, str):226            groupby = [groupby]227        self.groupby = groupby228        if isinstance(groups, str):229            groups = [groups]230        self.groups = groups231        self.failure_cases = None232        self._statistics = None233    @property234    def statistics(self) -> Dict[str, Any]:235        """Get check statistics."""236        return getattr(self, "_statistics")237    @statistics.setter238    def statistics(self, statistics):239        """Set check statistics."""240        self._statistics = statistics241    @staticmethod242    def _format_groupby_input(243        groupby_obj: GroupbyObject,244        groups: Optional[List[str]],245    ) -> Union[Dict[str, Union[pd.Series, pd.DataFrame]]]:246        """Format groupby object into dict of groups to Series or DataFrame.247        :param groupby_obj: a pandas groupby object.248        :param groups: only include these groups in the output.249        :returns: dictionary mapping group names to Series or DataFrame.250        """251        if groups is None:252            return dict(list(groupby_obj))253        group_keys = set(group_key for group_key, _ in groupby_obj)254        invalid_groups = [g for g in groups if g not in group_keys]255        if invalid_groups:256            raise KeyError(257                f"groups {invalid_groups} provided in `groups` argument not a valid group "258                f"key. Valid group keys: {group_keys}"259            )260        return {261            group_key: group262            for group_key, group in groupby_obj263            if group_key in groups264        }265    def _prepare_series_input(266        self,267        df_or_series: Union[pd.Series, pd.DataFrame],268        column: Optional[str] = None,269    ) -> SeriesCheckObj:270        """Prepare input for Column check.271        :param pd.Series series: one-dimensional ndarray with axis labels272            (including time series).273        :param pd.DataFrame dataframe_context: optional dataframe to supply274            when checking a Column in a DataFrameSchema.275        :returns: a Series, or a dictionary mapping groups to Series276            to be used by `_check_fn` and `_vectorized_check`277        """278        if check_utils.is_field(df_or_series):279            return df_or_series280        elif self.groupby is None:281            return df_or_series[column]282        elif isinstance(self.groupby, list):283            return self._format_groupby_input(284                df_or_series.groupby(self.groupby)[column],285                self.groups,286            )287        elif callable(self.groupby):288            return self._format_groupby_input(289                self.groupby(df_or_series)[column],290                self.groups,291            )292        raise TypeError("Type %s not recognized for `groupby` argument.")293    def _prepare_dataframe_input(294        self, dataframe: pd.DataFrame295    ) -> DataFrameCheckObj:296        """Prepare input for DataFrameSchema check.297        :param dataframe: dataframe to validate.298        :returns: a DataFrame, or a dictionary mapping groups to pd.DataFrame299            to be used by `_check_fn` and `_vectorized_check`300        """301        if self.groupby is None:302            return dataframe303        groupby_obj = dataframe.groupby(self.groupby)304        return self._format_groupby_input(groupby_obj, self.groups)305    def __call__(306        self,307        df_or_series: Union[pd.DataFrame, pd.Series],308        column: Optional[str] = None,309    ) -> CheckResult:310        # pylint: disable=too-many-branches311        """Validate pandas DataFrame or Series.312        :param df_or_series: pandas DataFrame of Series to validate.313        :param column: for dataframe checks, apply the check function to this314            column.315        :returns: CheckResult tuple containing:316            ``check_output``: boolean scalar, ``Series`` or ``DataFrame``317            indicating which elements passed the check.318            ``check_passed``: boolean scalar that indicating whether the check319            passed overall.320            ``checked_object``: the checked object itself. Depending on the321            options provided to the ``Check``, this will be a pandas Series,322            DataFrame, or if the ``groupby`` option is specified, a323            ``Dict[str, Series]`` or ``Dict[str, DataFrame]`` where the keys324            are distinct groups.325            ``failure_cases``: subset of the check_object that failed.326        """327        # prepare check object328        if check_utils.is_field(df_or_series) or (329            column is not None and check_utils.is_table(df_or_series)330        ):331            check_obj = self._prepare_series_input(df_or_series, column)332        elif check_utils.is_table(df_or_series):333            check_obj = self._prepare_dataframe_input(df_or_series)334        else:335            raise ValueError(336                f"object of type {type(df_or_series)} not supported. Must be "337                "a Series, a dictionary of Series, or DataFrame"338            )339        # apply check function to check object340        check_fn = partial(self._check_fn, **self._check_kwargs)341        if self.element_wise:342            check_output = (343                check_obj.apply(check_fn, axis=1)  # type: ignore344                if check_utils.is_table(check_obj)345                else check_obj.map(check_fn)  # type: ignore346                if check_utils.is_field(check_obj)347                else check_fn(check_obj)348            )349        else:350            # vectorized check function case351            check_output = check_fn(check_obj)352        # failure cases only apply when the check function returns a boolean353        # series that matches the shape and index of the check_obj354        if (355            isinstance(check_obj, dict)356            or isinstance(check_output, bool)357            or not check_utils.is_supported_check_obj(check_output)358            or check_obj.shape[0] != check_output.shape[0]359            or (check_obj.index != check_output.index).all()360        ):361            failure_cases = None362        elif check_utils.is_field(check_output):363            (364                check_output,365                failure_cases,366            ) = check_utils.prepare_series_check_output(367                check_obj,368                check_output,369                ignore_na=self.ignore_na,370                n_failure_cases=self.n_failure_cases,371            )372        elif check_utils.is_table(check_output):373            (374                check_output,375                failure_cases,376            ) = check_utils.prepare_dataframe_check_output(377                check_obj,378                check_output,379                df_orig=df_or_series,380                ignore_na=self.ignore_na,381                n_failure_cases=self.n_failure_cases,382            )383        else:384            raise TypeError(385                f"output type of check_fn not recognized: {type(check_output)}"386            )387        check_passed = (388            check_output.all()389            if check_utils.is_field(check_output)390            else check_output.all(axis=None)391            if check_utils.is_table(check_output)392            else check_output393        )394        return CheckResult(395            check_output, check_passed, check_obj, failure_cases396        )397    def __eq__(self, other: object) -> bool:398        if not isinstance(other, type(self)):399            return NotImplemented400        are_check_fn_objects_equal = (401            self._get_check_fn_code() == other._get_check_fn_code()402        )403        try:404            are_strategy_fn_objects_equal = all(405                getattr(self.__dict__.get("strategy"), attr)406                == getattr(other.__dict__.get("strategy"), attr)407                for attr in ["func", "args", "keywords"]408            )409        except AttributeError:410            are_strategy_fn_objects_equal = True411        are_all_other_check_attributes_equal = {412            k: v413            for k, v in self.__dict__.items()414            if k not in ["_check_fn", "strategy"]415        } == {416            k: v417            for k, v in other.__dict__.items()418            if k not in ["_check_fn", "strategy"]419        }420        return (421            are_check_fn_objects_equal422            and are_strategy_fn_objects_equal423            and are_all_other_check_attributes_equal424        )425    def _get_check_fn_code(self):426        check_fn = self.__dict__["_check_fn"]427        try:428            code = check_fn.__code__.co_code429        except AttributeError:430            # try accessing the functools.partial wrapper431            code = check_fn.func.__code__.co_code432        return code433    def __hash__(self) -> int:434        return hash(self._get_check_fn_code())435    def __repr__(self) -> str:436        return (437            f"<Check {self.name}: {self.error}>"438            if self.error is not None439            else f"<Check {self.name}>"440        )441class Check(_CheckBase):442    """Check a pandas Series or DataFrame for certain properties."""443    @classmethod444    @st.register_check_strategy(st.eq_strategy)445    @register_check_statistics(["value"])446    def equal_to(cls, value, **kwargs) -> "Check":447        """Ensure all elements of a series equal a certain value.448        *New in version 0.4.5*449        Alias: ``eq``450        :param value: All elements of a given :class:`pandas.Series` must have451            this value452        :param kwargs: key-word arguments passed into the `Check` initializer.453        :returns: :class:`Check` object454        """455        def _equal(series: pd.Series) -> pd.Series:456            """Comparison function for check"""457            return series == value458        return cls(459            _equal,460            name=cls.equal_to.__name__,461            error=f"equal_to({value})",462            **kwargs,463        )464    eq = equal_to465    @classmethod466    @st.register_check_strategy(st.ne_strategy)467    @register_check_statistics(["value"])468    def not_equal_to(cls, value, **kwargs) -> "Check":469        """Ensure no elements of a series equals a certain value.470        *New in version 0.4.5*471        Alias: ``ne``472        :param value: This value must not occur in the checked473            :class:`pandas.Series`.474        :param kwargs: key-word arguments passed into the `Check` initializer.475        :returns: :class:`Check` object476        """477        def _not_equal(series: pd.Series) -> pd.Series:478            """Comparison function for check"""479            return series != value480        return cls(481            _not_equal,482            name=cls.not_equal_to.__name__,483            error=f"not_equal_to({value})",484            **kwargs,485        )486    ne = not_equal_to487    @classmethod488    @st.register_check_strategy(st.gt_strategy)489    @register_check_statistics(["min_value"])490    def greater_than(cls, min_value, **kwargs) -> "Check":491        """Ensure values of a series are strictly greater than a minimum value.492        *New in version 0.4.5*493        Alias: ``gt``494        :param min_value: Lower bound to be exceeded. Must be a type comparable495            to the dtype of the :class:`pandas.Series` to be validated (e.g. a496            numerical type for float or int and a datetime for datetime).497        :param kwargs: key-word arguments passed into the `Check` initializer.498        :returns: :class:`Check` object499        """500        if min_value is None:501            raise ValueError("min_value must not be None")502        def _greater_than(series: pd.Series) -> pd.Series:503            """Comparison function for check"""504            return series > min_value505        return cls(506            _greater_than,507            name=cls.greater_than.__name__,508            error=f"greater_than({min_value})",509            **kwargs,510        )511    gt = greater_than512    @classmethod513    @st.register_check_strategy(st.ge_strategy)514    @register_check_statistics(["min_value"])515    def greater_than_or_equal_to(cls, min_value, **kwargs) -> "Check":516        """Ensure all values are greater or equal a certain value.517        *New in version 0.4.5*518        Alias: ``ge``519        :param min_value: Allowed minimum value for values of a series. Must be520            a type comparable to the dtype of the :class:`pandas.Series` to be521            validated.522        :param kwargs: key-word arguments passed into the `Check` initializer.523        :returns: :class:`Check` object524        """525        if min_value is None:526            raise ValueError("min_value must not be None")527        def _greater_or_equal(series: pd.Series) -> pd.Series:528            """Comparison function for check"""529            return series >= min_value530        return cls(531            _greater_or_equal,532            name=cls.greater_than_or_equal_to.__name__,533            error=f"greater_than_or_equal_to({min_value})",534            **kwargs,535        )536    ge = greater_than_or_equal_to537    @classmethod538    @st.register_check_strategy(st.lt_strategy)539    @register_check_statistics(["max_value"])540    def less_than(cls, max_value, **kwargs) -> "Check":541        """Ensure values of a series are strictly below a maximum value.542        *New in version 0.4.5*543        Alias: ``lt``544        :param max_value: All elements of a series must be strictly smaller545            than this. Must be a type comparable to the dtype of the546            :class:`pandas.Series` to be validated.547        :param kwargs: key-word arguments passed into the `Check` initializer.548        :returns: :class:`Check` object549        """550        if max_value is None:551            raise ValueError("max_value must not be None")552        def _less_than(series: pd.Series) -> pd.Series:553            """Comparison function for check"""554            return series < max_value555        return cls(556            _less_than,557            name=cls.less_than.__name__,558            error=f"less_than({max_value})",559            **kwargs,560        )561    lt = less_than562    @classmethod563    @st.register_check_strategy(st.le_strategy)564    @register_check_statistics(["max_value"])565    def less_than_or_equal_to(cls, max_value, **kwargs) -> "Check":566        """Ensure values are less than or equal to a maximum value.567        *New in version 0.4.5*568        Alias: ``le``569        :param max_value: Upper bound not to be exceeded. Must be a type570            comparable to the dtype of the :class:`pandas.Series` to be571            validated.572        :param kwargs: key-word arguments passed into the `Check` initializer.573        :returns: :class:`Check` object574        """575        if max_value is None:576            raise ValueError("max_value must not be None")577        def _less_or_equal(series: pd.Series) -> pd.Series:578            """Comparison function for check"""579            return series <= max_value580        return cls(581            _less_or_equal,582            name=cls.less_than_or_equal_to.__name__,583            error=f"less_than_or_equal_to({max_value})",584            **kwargs,585        )586    le = less_than_or_equal_to587    @classmethod588    @st.register_check_strategy(st.in_range_strategy)589    @register_check_statistics(590        ["min_value", "max_value", "include_min", "include_max"]591    )592    def in_range(593        cls, min_value, max_value, include_min=True, include_max=True, **kwargs594    ) -> "Check":595        """Ensure all values of a series are within an interval.596        :param min_value: Left / lower endpoint of the interval.597        :param max_value: Right / upper endpoint of the interval. Must not be598            smaller than min_value.599        :param include_min: Defines whether min_value is also an allowed value600            (the default) or whether all values must be strictly greater than601            min_value.602        :param include_max: Defines whether min_value is also an allowed value603            (the default) or whether all values must be strictly smaller than604            max_value.605        :param kwargs: key-word arguments passed into the `Check` initializer.606        Both endpoints must be a type comparable to the dtype of the607        :class:`pandas.Series` to be validated.608        :returns: :class:`Check` object609        """610        if min_value is None:611            raise ValueError("min_value must not be None")612        if max_value is None:613            raise ValueError("max_value must not be None")614        if max_value < min_value or (615            min_value == max_value and (not include_min or not include_max)616        ):617            raise ValueError(618                f"The combination of min_value = {min_value} and max_value = {max_value} "619                "defines an empty interval!"620            )621        # Using functions from operator module to keep conditions out of the622        # closure623        left_op = operator.le if include_min else operator.lt624        right_op = operator.ge if include_max else operator.gt625        def _in_range(series: pd.Series) -> pd.Series:626            """Comparison function for check"""627            return left_op(min_value, series) & right_op(max_value, series)628        return cls(629            _in_range,630            name=cls.in_range.__name__,631            error=f"in_range({min_value}, {max_value})",632            **kwargs,633        )634    @classmethod635    @st.register_check_strategy(st.isin_strategy)636    @register_check_statistics(["allowed_values"])637    def isin(cls, allowed_values: Iterable, **kwargs) -> "Check":638        """Ensure only allowed values occur within a series.639        :param allowed_values: The set of allowed values. May be any iterable.640        :param kwargs: key-word arguments passed into the `Check` initializer.641        :returns: :class:`Check` object642        .. note::643            It is checked whether all elements of a :class:`pandas.Series`644            are part of the set of elements of allowed values. If allowed645            values is a string, the set of elements consists of all distinct646            characters of the string. Thus only single characters which occur647            in allowed_values at least once can meet this condition. If you648            want to check for substrings use :func:`Check.str_is_substring`.649        """650        # Turn allowed_values into a set. Not only for performance but also651        # avoid issues with a mutable argument passed by reference which may be652        # changed from outside.653        try:654            allowed_values = frozenset(allowed_values)655        except TypeError as exc:656            raise ValueError(657                f"Argument allowed_values must be iterable. Got {allowed_values}"658            ) from exc659        def _isin(series: pd.Series) -> pd.Series:660            """Comparison function for check"""661            return series.isin(allowed_values)662        return cls(663            _isin,664            name=cls.isin.__name__,665            error=f"isin({set(allowed_values)})",666            **kwargs,667        )668    @classmethod669    @st.register_check_strategy(st.notin_strategy)670    @register_check_statistics(["forbidden_values"])671    def notin(cls, forbidden_values: Iterable, **kwargs) -> "Check":672        """Ensure some defined values don't occur within a series.673        :param forbidden_values: The set of values which should not occur. May674            be any iterable.675        :param raise_warning: if True, check raises UserWarning instead of676            SchemaError on validation.677        :returns: :class:`Check` object678        .. note::679            Like :func:`Check.isin` this check operates on single characters if680            it is applied on strings. A string as paraforbidden_valuesmeter681            forbidden_values is understood as set of prohibited characters. Any682            string of length > 1 can't be in it by design.683        """684        # Turn forbidden_values into a set. Not only for performance but also685        # avoid issues with a mutable argument passed by reference which may be686        # changed from outside.687        try:688            forbidden_values = frozenset(forbidden_values)689        except TypeError as exc:690            raise ValueError(691                f"Argument forbidden_values must be iterable. Got {forbidden_values}"692            ) from exc693        def _notin(series: pd.Series) -> pd.Series:694            """Comparison function for check"""695            return ~series.isin(forbidden_values)696        return cls(697            _notin,698            name=cls.notin.__name__,699            error=f"notin({set(forbidden_values)})",700            **kwargs,701        )702    @classmethod703    @st.register_check_strategy(st.str_matches_strategy)704    @register_check_statistics(["pattern"])705    def str_matches(cls, pattern: str, **kwargs) -> "Check":706        """Ensure that string values match a regular expression.707        :param pattern: Regular expression pattern to use for matching708        :param kwargs: key-word arguments passed into the `Check` initializer.709        :returns: :class:`Check` object710        The behaviour is as of :func:`pandas.Series.str.match`.711        """712        # By compiling the regex we get the benefit of an early argument check713        try:714            regex = re.compile(pattern)715        except TypeError as exc:716            raise ValueError(717                f'pattern="{pattern}" cannot be compiled as regular expression'718            ) from exc719        def _match(series: pd.Series) -> pd.Series:720            """721            Check if all strings in the series match the regular expression.722            """723            return series.str.match(regex, na=False)724        return cls(725            _match,726            name=cls.str_matches.__name__,727            error=f"str_matches({regex})",728            **kwargs,729        )730    @classmethod731    @st.register_check_strategy(st.str_contains_strategy)732    @register_check_statistics(["pattern"])733    def str_contains(cls, pattern: str, **kwargs) -> "Check":734        """Ensure that a pattern can be found within each row.735        :param pattern: Regular expression pattern to use for searching736        :param kwargs: key-word arguments passed into the `Check` initializer.737        :returns: :class:`Check` object738        The behaviour is as of :func:`pandas.Series.str.contains`.739        """740        # By compiling the regex we get the benefit of an early argument check741        try:742            regex = re.compile(pattern)743        except TypeError as exc:744            raise ValueError(745                f'pattern="{pattern}" cannot be compiled as regular expression'746            ) from exc747        def _contains(series: pd.Series) -> pd.Series:748            """Check if a regex search is successful within each value"""749            return series.str.contains(regex, na=False)750        return cls(751            _contains,752            name=cls.str_contains.__name__,753            error=f"str_contains({regex})",754            **kwargs,755        )756    @classmethod757    @st.register_check_strategy(st.str_startswith_strategy)758    @register_check_statistics(["string"])759    def str_startswith(cls, string: str, **kwargs) -> "Check":760        """Ensure that all values start with a certain string.761        :param string: String all values should start with762        :param kwargs: key-word arguments passed into the `Check` initializer.763        :returns: :class:`Check` object764        """765        def _startswith(series: pd.Series) -> pd.Series:766            """Returns true only for strings starting with string"""767            return series.str.startswith(string, na=False)768        return cls(769            _startswith,770            name=cls.str_startswith.__name__,771            error=f"str_startswith({string})",772            **kwargs,773        )774    @classmethod775    @st.register_check_strategy(st.str_endswith_strategy)776    @register_check_statistics(["string"])777    def str_endswith(cls, string: str, **kwargs) -> "Check":778        """Ensure that all values end with a certain string.779        :param string: String all values should end with780        :param kwargs: key-word arguments passed into the `Check` initializer.781        :returns: :class:`Check` object782        """783        def _endswith(series: pd.Series) -> pd.Series:784            """Returns true only for strings ending with string"""785            return series.str.endswith(string, na=False)786        return cls(787            _endswith,788            name=cls.str_endswith.__name__,789            error=f"str_endswith({string})",790            **kwargs,791        )792    @classmethod793    @st.register_check_strategy(st.str_length_strategy)794    @register_check_statistics(["min_value", "max_value"])795    def str_length(796        cls, min_value: int = None, max_value: int = None, **kwargs797    ) -> "Check":798        """Ensure that the length of strings is within a specified range.799        :param min_value: Minimum length of strings (default: no minimum)800        :param max_value: Maximum length of strings (default: no maximum)801        :param kwargs: key-word arguments passed into the `Check` initializer.802        :returns: :class:`Check` object803        """804        if min_value is None and max_value is None:805            raise ValueError(806                "At least a minimum or a maximum need to be specified. Got "807                "None."808            )809        if max_value is None:810            def _str_length(series: pd.Series) -> pd.Series:811                """Check for the minimum string length"""812                return series.str.len() >= min_value813        elif min_value is None:814            def _str_length(series: pd.Series) -> pd.Series:815                """Check for the maximum string length"""816                return series.str.len() <= max_value817        else:818            def _str_length(series: pd.Series) -> pd.Series:819                """Check for both, minimum and maximum string length"""820                return (series.str.len() <= max_value) & (821                    series.str.len() >= min_value822                )823        return cls(824            _str_length,825            name=cls.str_length.__name__,826            error=f"str_length({min_value}, {max_value})",827            **kwargs,...test_strategies.py
Source:test_strategies.py  
...199@hypothesis.given(st.data())200@hypothesis.settings(201    suppress_health_check=[hypothesis.HealthCheck.too_slow],202)203def test_in_range_strategy(data_type, chained, data):204    """Test the built-in in-range strategy can correctly generate data."""205    min_value, max_value = data.draw(value_ranges(data_type))206    hypothesis.assume(min_value < max_value)207    base_st_in_range = None208    if chained:209        if is_float(data_type):210            base_st_kwargs = {211                "exclude_min": False,212                "exclude_max": False,213            }214        else:215            base_st_kwargs = {}216        # constraining the strategy this way makes testing more efficient217        base_st_in_range = strategies.pandas_dtype_strategy(218            data_type,219            min_value=min_value,220            max_value=max_value,221            **base_st_kwargs,  # type: ignore[arg-type]222        )223    strat = strategies.in_range_strategy(224        data_type,225        base_st_in_range,226        min_value=min_value,227        max_value=max_value,228    )229    assert min_value <= data.draw(strat) <= max_value230@pytest.mark.parametrize(231    "data_type",232    [data_type for data_type in SUPPORTED_DTYPES if data_type.continuous],233)234@pytest.mark.parametrize("chained", [True, False])235@hypothesis.given(st.data())236@hypothesis.settings(237    suppress_health_check=[hypothesis.HealthCheck.too_slow],...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
