How to use pandas_dtype_strategy method in pandera

Best Python code snippet using pandera_python

strategies.py

Source:strategies.py Github

copy

Full Screen

...282 ) from err283 if np_dtype == np.dtype("object") or str(pandera_dtype) == "str":284 np_dtype = np.dtype(str)285 return np_dtype286def pandas_dtype_strategy(287 pandera_dtype: DataType,288 strategy: Optional[SearchStrategy] = None,289 **kwargs,290) -> SearchStrategy:291 # pylint: disable=line-too-long,no-else-raise292 """Strategy to generate data from a :class:`pandera.dtypes.DataType`.293 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.294 :param strategy: an optional hypothesis strategy. If specified, the295 pandas dtype strategy will be chained onto this strategy.296 :kwargs: key-word arguments passed into297 `hypothesis.extra.numpy.from_dtype <https://hypothesis.readthedocs.io/en/latest/numpy.html#hypothesis.extra.numpy.from_dtype>`_ .298 For datetime, timedelta, and complex number datatypes, these arguments299 are passed into :func:`~pandera.strategies.numpy_time_dtypes` and300 :func:`~pandera.strategies.numpy_complex_dtypes`.301 :returns: ``hypothesis`` strategy302 """303 def compat_kwargs(*args):304 return {k: v for k, v in kwargs.items() if k in args}305 # hypothesis doesn't support categoricals or objects, so we'll will need to306 # build a pandera-specific solution.307 if is_category(pandera_dtype):308 raise TypeError(309 "data generation for the Category dtype is currently "310 "unsupported. Consider using a string or int dtype and "311 "Check.isin(values) to ensure a finite set of values."312 )313 np_dtype = to_numpy_dtype(pandera_dtype)314 if strategy:315 if _is_datetime_tz(pandera_dtype):316 return _datetime_strategy(pandera_dtype.type, strategy) # type: ignore317 return strategy.map(np_dtype.type)318 elif is_datetime(pandera_dtype) or is_timedelta(pandera_dtype):319 return numpy_time_dtypes(320 pandera_dtype.type if _is_datetime_tz(pandera_dtype) else np_dtype, # type: ignore321 **compat_kwargs("min_value", "max_value"),322 )323 elif is_complex(pandera_dtype):324 return numpy_complex_dtypes(325 np_dtype,326 **compat_kwargs(327 "min_value", "max_value", "allow_infinity", "allow_nan"328 ),329 )330 return npst.from_dtype(331 np_dtype,332 **{ # type: ignore333 "allow_nan": False,334 "allow_infinity": False,335 **kwargs,336 },337 )338def eq_strategy(339 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],340 strategy: Optional[SearchStrategy] = None,341 *,342 value: Any,343) -> SearchStrategy:344 """Strategy to generate a single value.345 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.346 :param strategy: an optional hypothesis strategy. If specified, the347 pandas dtype strategy will be chained onto this strategy.348 :param value: value to generate.349 :returns: ``hypothesis`` strategy350 """351 # override strategy preceding this one and generate value of the same type352 # pylint: disable=unused-argument353 return pandas_dtype_strategy(pandera_dtype, st.just(value))354def ne_strategy(355 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],356 strategy: Optional[SearchStrategy] = None,357 *,358 value: Any,359) -> SearchStrategy:360 """Strategy to generate anything except for a particular value.361 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.362 :param strategy: an optional hypothesis strategy. If specified, the363 pandas dtype strategy will be chained onto this strategy.364 :param value: value to avoid.365 :returns: ``hypothesis`` strategy366 """367 if strategy is None:368 strategy = pandas_dtype_strategy(pandera_dtype)369 return strategy.filter(lambda x: x != value)370def gt_strategy(371 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],372 strategy: Optional[SearchStrategy] = None,373 *,374 min_value: Union[int, float],375) -> SearchStrategy:376 """Strategy to generate values greater than a minimum value.377 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.378 :param strategy: an optional hypothesis strategy. If specified, the379 pandas dtype strategy will be chained onto this strategy.380 :param min_value: generate values larger than this.381 :returns: ``hypothesis`` strategy382 """383 if strategy is None:384 strategy = pandas_dtype_strategy(385 pandera_dtype,386 min_value=min_value,387 exclude_min=True if is_float(pandera_dtype) else None,388 )389 return strategy.filter(lambda x: x > min_value)390def ge_strategy(391 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],392 strategy: Optional[SearchStrategy] = None,393 *,394 min_value: Union[int, float],395) -> SearchStrategy:396 """Strategy to generate values greater than or equal to a minimum value.397 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.398 :param strategy: an optional hypothesis strategy. If specified, the399 pandas dtype strategy will be chained onto this strategy.400 :param min_value: generate values greater than or equal to this.401 :returns: ``hypothesis`` strategy402 """403 if strategy is None:404 return pandas_dtype_strategy(405 pandera_dtype,406 min_value=min_value,407 exclude_min=False if is_float(pandera_dtype) else None,408 )409 return strategy.filter(lambda x: x >= min_value)410def lt_strategy(411 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],412 strategy: Optional[SearchStrategy] = None,413 *,414 max_value: Union[int, float],415) -> SearchStrategy:416 """Strategy to generate values less than a maximum value.417 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.418 :param strategy: an optional hypothesis strategy. If specified, the419 pandas dtype strategy will be chained onto this strategy.420 :param max_value: generate values less than this.421 :returns: ``hypothesis`` strategy422 """423 if strategy is None:424 strategy = pandas_dtype_strategy(425 pandera_dtype,426 max_value=max_value,427 exclude_max=True if is_float(pandera_dtype) else None,428 )429 return strategy.filter(lambda x: x < max_value)430def le_strategy(431 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],432 strategy: Optional[SearchStrategy] = None,433 *,434 max_value: Union[int, float],435) -> SearchStrategy:436 """Strategy to generate values less than or equal to a maximum value.437 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.438 :param strategy: an optional hypothesis strategy. If specified, the439 pandas dtype strategy will be chained onto this strategy.440 :param max_value: generate values less than or equal to this.441 :returns: ``hypothesis`` strategy442 """443 if strategy is None:444 return pandas_dtype_strategy(445 pandera_dtype,446 max_value=max_value,447 exclude_max=False if is_float(pandera_dtype) else None,448 )449 return strategy.filter(lambda x: x <= max_value)450def in_range_strategy(451 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],452 strategy: Optional[SearchStrategy] = None,453 *,454 min_value: Union[int, float],455 max_value: Union[int, float],456 include_min: bool = True,457 include_max: bool = True,458) -> SearchStrategy:459 """Strategy to generate values within a particular range.460 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.461 :param strategy: an optional hypothesis strategy. If specified, the462 pandas dtype strategy will be chained onto this strategy.463 :param min_value: generate values greater than this.464 :param max_value: generate values less than this.465 :param include_min: include min_value in generated data.466 :param include_max: include max_value in generated data.467 :returns: ``hypothesis`` strategy468 """469 if strategy is None:470 return pandas_dtype_strategy(471 pandera_dtype,472 min_value=min_value,473 max_value=max_value,474 exclude_min=not include_min,475 exclude_max=not include_max,476 )477 min_op = operator.ge if include_min else operator.gt478 max_op = operator.le if include_max else operator.lt479 return strategy.filter(480 lambda x: min_op(x, min_value) and max_op(x, max_value)481 )482def isin_strategy(483 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],484 strategy: Optional[SearchStrategy] = None,485 *,486 allowed_values: Sequence[Any],487) -> SearchStrategy:488 """Strategy to generate values within a finite set.489 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.490 :param strategy: an optional hypothesis strategy. If specified, the491 pandas dtype strategy will be chained onto this strategy.492 :param allowed_values: set of allowable values.493 :returns: ``hypothesis`` strategy494 """495 if strategy is None:496 return pandas_dtype_strategy(497 pandera_dtype, st.sampled_from(allowed_values)498 )499 return strategy.filter(lambda x: x in allowed_values)500def notin_strategy(501 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],502 strategy: Optional[SearchStrategy] = None,503 *,504 forbidden_values: Sequence[Any],505) -> SearchStrategy:506 """Strategy to generate values excluding a set of forbidden values507 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.508 :param strategy: an optional hypothesis strategy. If specified, the509 pandas dtype strategy will be chained onto this strategy.510 :param forbidden_values: set of forbidden values.511 :returns: ``hypothesis`` strategy512 """513 if strategy is None:514 strategy = pandas_dtype_strategy(pandera_dtype)515 return strategy.filter(lambda x: x not in forbidden_values)516def str_matches_strategy(517 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],518 strategy: Optional[SearchStrategy] = None,519 *,520 pattern: str,521) -> SearchStrategy:522 """Strategy to generate strings that patch a regex pattern.523 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.524 :param strategy: an optional hypothesis strategy. If specified, the525 pandas dtype strategy will be chained onto this strategy.526 :param pattern: regex pattern.527 :returns: ``hypothesis`` strategy528 """529 if strategy is None:530 return st.from_regex(pattern, fullmatch=True).map(531 to_numpy_dtype(pandera_dtype).type532 )533 def matches(x):534 return re.match(pattern, x)535 return strategy.filter(matches)536def str_contains_strategy(537 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],538 strategy: Optional[SearchStrategy] = None,539 *,540 pattern: str,541) -> SearchStrategy:542 """Strategy to generate strings that contain a particular pattern.543 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.544 :param strategy: an optional hypothesis strategy. If specified, the545 pandas dtype strategy will be chained onto this strategy.546 :param pattern: regex pattern.547 :returns: ``hypothesis`` strategy548 """549 if strategy is None:550 return st.from_regex(pattern, fullmatch=False).map(551 to_numpy_dtype(pandera_dtype).type552 )553 def contains(x):554 return re.search(pattern, x)555 return strategy.filter(contains)556def str_startswith_strategy(557 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],558 strategy: Optional[SearchStrategy] = None,559 *,560 string: str,561) -> SearchStrategy:562 """Strategy to generate strings that start with a specific string pattern.563 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.564 :param strategy: an optional hypothesis strategy. If specified, the565 pandas dtype strategy will be chained onto this strategy.566 :param string: string pattern.567 :returns: ``hypothesis`` strategy568 """569 if strategy is None:570 return st.from_regex(f"\\A{string}", fullmatch=False).map(571 to_numpy_dtype(pandera_dtype).type572 )573 return strategy.filter(lambda x: x.startswith(string))574def str_endswith_strategy(575 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],576 strategy: Optional[SearchStrategy] = None,577 *,578 string: str,579) -> SearchStrategy:580 """Strategy to generate strings that end with a specific string pattern.581 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.582 :param strategy: an optional hypothesis strategy. If specified, the583 pandas dtype strategy will be chained onto this strategy.584 :param string: string pattern.585 :returns: ``hypothesis`` strategy586 """587 if strategy is None:588 return st.from_regex(f"{string}\\Z", fullmatch=False).map(589 to_numpy_dtype(pandera_dtype).type590 )591 return strategy.filter(lambda x: x.endswith(string))592def str_length_strategy(593 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],594 strategy: Optional[SearchStrategy] = None,595 *,596 min_value: int,597 max_value: int,598) -> SearchStrategy:599 """Strategy to generate strings of a particular length600 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.601 :param strategy: an optional hypothesis strategy. If specified, the602 pandas dtype strategy will be chained onto this strategy.603 :param min_value: minimum string length.604 :param max_value: maximum string length.605 :returns: ``hypothesis`` strategy606 """607 if strategy is None:608 return st.text(min_size=min_value, max_size=max_value).map(609 to_numpy_dtype(pandera_dtype).type610 )611 return strategy.filter(lambda x: min_value <= len(x) <= max_value)612def _timestamp_to_datetime64_strategy(613 strategy: SearchStrategy,614) -> SearchStrategy:615 """Convert timestamp to numpy.datetime64616 Hypothesis only supports pure numpy dtypes but numpy.datetime64() truncates617 nanoseconds if given a pandas.Timestamp. We need to pass the unix epoch via618 the pandas.Timestamp.value attribute.619 """620 return st.builds(lambda x: np.datetime64(x.value, "ns"), strategy)621def field_element_strategy(622 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],623 strategy: Optional[SearchStrategy] = None,624 *,625 checks: Optional[Sequence] = None,626) -> SearchStrategy:627 """Strategy to generate elements of a column or index.628 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.629 :param strategy: an optional hypothesis strategy. If specified, the630 pandas dtype strategy will be chained onto this strategy.631 :param checks: sequence of :class:`~pandera.checks.Check` s to constrain632 the values of the data in the column/index.633 :returns: ``hypothesis`` strategy634 """635 if strategy:636 raise BaseStrategyOnlyError(637 "The series strategy is a base strategy. You cannot specify the "638 "strategy argument to chain it to a parent strategy."639 )640 checks = [] if checks is None else checks641 elements = None642 def undefined_check_strategy(elements, check):643 """Strategy for checks with undefined strategies."""644 warnings.warn(645 "Element-wise check doesn't have a defined strategy."646 "Falling back to filtering drawn values based on the check "647 "definition. This can considerably slow down data-generation."648 )649 return (650 pandas_dtype_strategy(pandera_dtype)651 if elements is None652 else elements653 ).filter(check._check_fn)654 for check in checks:655 if hasattr(check, "strategy"):656 elements = check.strategy(pandera_dtype, elements)657 elif check.element_wise:658 elements = undefined_check_strategy(elements, check)659 # NOTE: vectorized checks with undefined strategies should be handled660 # by the series/dataframe strategy.661 if elements is None:662 elements = pandas_dtype_strategy(pandera_dtype)663 # Hypothesis only supports pure numpy datetime64 (i.e. timezone naive).664 # We cast to datetime64 after applying the check strategy so that checks665 # can see timezone-aware values.666 if _is_datetime_tz(pandera_dtype):667 elements = _timestamp_to_datetime64_strategy(elements)668 return elements669def series_strategy(670 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],671 strategy: Optional[SearchStrategy] = None,672 *,673 checks: Optional[Sequence] = None,674 nullable: bool = False,675 unique: bool = False,676 name: Optional[str] = None,677 size: Optional[int] = None,678):679 """Strategy to generate a pandas Series.680 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.681 :param strategy: an optional hypothesis strategy. If specified, the682 pandas dtype strategy will be chained onto this strategy.683 :param checks: sequence of :class:`~pandera.checks.Check` s to constrain684 the values of the data in the column/index.685 :param nullable: whether or not generated Series contains null values.686 :param unique: whether or not generated Series contains unique values.687 :param name: name of the Series.688 :param size: number of elements in the Series.689 :returns: ``hypothesis`` strategy.690 """691 elements = field_element_strategy(pandera_dtype, strategy, checks=checks)692 strategy = (693 pdst.series(694 elements=elements,695 dtype=to_numpy_dtype(pandera_dtype),696 index=pdst.range_indexes(697 min_size=0 if size is None else size, max_size=size698 ),699 unique=unique,700 )701 .filter(lambda x: x.shape[0] > 0)702 .map(lambda x: x.rename(name))703 .map(lambda x: x.astype(pandera_dtype.type))704 )705 if nullable:706 strategy = null_field_masks(strategy)707 def undefined_check_strategy(strategy, check):708 """Strategy for checks with undefined strategies."""709 warnings.warn(710 "Vectorized check doesn't have a defined strategy."711 "Falling back to filtering drawn values based on the check "712 "definition. This can considerably slow down data-generation."713 )714 def _check_fn(series):715 return check(series).check_passed716 return strategy.filter(_check_fn)717 for check in checks if checks is not None else []:718 if not hasattr(check, "strategy") and not check.element_wise:719 strategy = undefined_check_strategy(strategy, check)720 return strategy721def column_strategy(722 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],723 strategy: Optional[SearchStrategy] = None,724 *,725 checks: Optional[Sequence] = None,726 unique: bool = False,727 name: Optional[str] = None,728):729 # pylint: disable=line-too-long730 """Create a data object describing a column in a DataFrame.731 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.732 :param strategy: an optional hypothesis strategy. If specified, the733 pandas dtype strategy will be chained onto this strategy.734 :param checks: sequence of :class:`~pandera.checks.Check` s to constrain735 the values of the data in the column/index.736 :param unique: whether or not generated Series contains unique values.737 :param name: name of the Series.738 :returns: a `column <https://hypothesis.readthedocs.io/en/latest/numpy.html#hypothesis.extra.pandas.column>`_ object.739 """740 verify_dtype(pandera_dtype, schema_type="column", name=name)741 elements = field_element_strategy(pandera_dtype, strategy, checks=checks)742 return pdst.column(743 name=name,744 elements=elements,745 dtype=to_numpy_dtype(pandera_dtype),746 unique=unique,747 )748def index_strategy(749 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],750 strategy: Optional[SearchStrategy] = None,751 *,752 checks: Optional[Sequence] = None,753 nullable: bool = False,754 unique: bool = False,755 name: Optional[str] = None,756 size: Optional[int] = None,757):758 """Strategy to generate a pandas Index.759 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.760 :param strategy: an optional hypothesis strategy. If specified, the761 pandas dtype strategy will be chained onto this strategy.762 :param checks: sequence of :class:`~pandera.checks.Check` s to constrain763 the values of the data in the column/index.764 :param nullable: whether or not generated Series contains null values.765 :param unique: whether or not generated Series contains unique values.766 :param name: name of the Series.767 :param size: number of elements in the Series.768 :returns: ``hypothesis`` strategy.769 """770 verify_dtype(pandera_dtype, schema_type="index", name=name)771 elements = field_element_strategy(pandera_dtype, strategy, checks=checks)772 strategy = pdst.indexes(773 elements=elements,774 dtype=to_numpy_dtype(pandera_dtype),775 min_size=0 if size is None else size,776 max_size=size,777 unique=unique,778 ).map(lambda x: x.astype(pandera_dtype.type))779 # this is a hack to convert np.str_ data values into native python str.780 col_dtype = str(pandera_dtype)781 if col_dtype in {"object", "str"} or col_dtype.startswith("string"):782 # pylint: disable=cell-var-from-loop,undefined-loop-variable783 strategy = strategy.map(lambda index: index.map(str))784 if name is not None:785 strategy = strategy.map(lambda index: index.rename(name))786 if nullable:787 strategy = null_field_masks(strategy)788 return strategy789def dataframe_strategy(790 pandera_dtype: Optional[DataType] = None,791 strategy: Optional[SearchStrategy] = None,792 *,793 columns: Optional[Dict] = None,794 checks: Optional[Sequence] = None,795 unique: Optional[List[str]] = None,796 index: Optional[IndexComponent] = None,797 size: Optional[int] = None,798 n_regex_columns: int = 1,799):800 """Strategy to generate a pandas DataFrame.801 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.802 :param strategy: if specified, this will raise a BaseStrategyOnlyError,803 since it cannot be chained to a prior strategy.804 :param columns: a dictionary where keys are column names and values805 are :class:`~pandera.schema_components.Column` objects.806 :param checks: sequence of :class:`~pandera.checks.Check` s to constrain807 the values of the data at the dataframe level.808 :param unique: a list of column names that should be jointly unique.809 :param index: Index or MultiIndex schema component.810 :param size: number of elements in the Series.811 :param n_regex_columns: number of regex columns to generate.812 :returns: ``hypothesis`` strategy.813 """814 # pylint: disable=too-many-locals,too-many-branches,too-many-statements815 if n_regex_columns < 1:816 raise ValueError(817 "`n_regex_columns` must be a positive integer, found: "818 f"{n_regex_columns}"819 )820 if strategy:821 raise BaseStrategyOnlyError(822 "The dataframe strategy is a base strategy. You cannot specify "823 "the strategy argument to chain it to a parent strategy."824 )825 columns = {} if columns is None else columns826 checks = [] if checks is None else checks827 def undefined_check_strategy(strategy, check, column=None):828 """Strategy for checks with undefined strategies."""829 def _element_wise_check_fn(element):830 return check._check_fn(element)831 def _column_check_fn(dataframe):832 return check(dataframe[column]).check_passed833 def _dataframe_check_fn(dataframe):834 return check(dataframe).check_passed835 if check.element_wise:836 check_fn = _element_wise_check_fn837 warning_type = "Element-wise"838 elif column is None:839 check_fn = _dataframe_check_fn840 warning_type = "Dataframe"841 else:842 check_fn = _column_check_fn843 warning_type = "Column"844 warnings.warn(845 f"{warning_type} check doesn't have a defined strategy. "846 "Falling back to filtering drawn values based on the check "847 "definition. This can considerably slow down data-generation."848 )849 return strategy.filter(check_fn)850 def make_row_strategy(col, checks):851 strategy = None852 for check in checks:853 if hasattr(check, "strategy"):854 strategy = check.strategy(col.dtype, strategy)855 else:856 strategy = undefined_check_strategy(857 strategy=(858 pandas_dtype_strategy(col.dtype)859 if strategy is None860 else strategy861 ),862 check=check,863 )864 if strategy is None:865 strategy = pandas_dtype_strategy(col.dtype)866 return strategy867 @composite868 def _dataframe_strategy(draw):869 row_strategy_checks = []870 undefined_strat_df_checks = []871 for check in checks:872 if hasattr(check, "strategy") or check.element_wise:873 # we can apply element-wise checks defined at the dataframe874 # level to the row strategy875 row_strategy_checks.append(check)876 else:877 undefined_strat_df_checks.append(check)878 # expand column set to generate column names for columns where879 # regex=True....

Full Screen

Full Screen

test_strategies.py

Source:test_strategies.py Github

copy

Full Screen

...63 subtype=np.int6464 ),65 ],66)67def test_unsupported_pandas_dtype_strategy(data_type):68 """Test unsupported pandas dtype strategy raises error."""69 with pytest.raises(TypeError, match=r"is currently unsupported"):70 strategies.pandas_dtype_strategy(data_type)71@pytest.mark.parametrize("data_type", SUPPORTED_DTYPES)72@hypothesis.given(st.data())73@hypothesis.settings(74 suppress_health_check=[75 hypothesis.HealthCheck.too_slow,76 hypothesis.HealthCheck.data_too_large,77 ],78 max_examples=20,79)80def test_pandas_dtype_strategy(data_type, data):81 """Test that series can be constructed from pandas dtype."""82 strategy = strategies.pandas_dtype_strategy(data_type)83 example = data.draw(strategy)84 expected_type = strategies.to_numpy_dtype(data_type).type85 if isinstance(example, pd.Timestamp):86 example = example.to_numpy()87 assert example.dtype.type == expected_type88 chained_strategy = strategies.pandas_dtype_strategy(data_type, strategy)89 chained_example = data.draw(chained_strategy)90 if isinstance(chained_example, pd.Timestamp):91 chained_example = chained_example.to_numpy()92 assert chained_example.dtype.type == expected_type93@pytest.mark.parametrize("data_type", NUMERIC_DTYPES)94@hypothesis.given(st.data())95@hypothesis.settings(96 suppress_health_check=[hypothesis.HealthCheck.too_slow],97)98def test_check_strategy_continuous(data_type, data):99 """Test built-in check strategies can generate continuous data."""100 np_dtype = strategies.to_numpy_dtype(data_type)101 value = data.draw(102 npst.from_dtype(103 strategies.to_numpy_dtype(data_type),104 allow_nan=False,105 allow_infinity=False,106 )107 )108 # don't overstep bounds of representation109 hypothesis.assume(np.finfo(np_dtype).min < value < np.finfo(np_dtype).max)110 assert data.draw(strategies.ne_strategy(data_type, value=value)) != value111 assert data.draw(strategies.eq_strategy(data_type, value=value)) == value112 assert (113 data.draw(strategies.gt_strategy(data_type, min_value=value)) > value114 )115 assert (116 data.draw(strategies.ge_strategy(data_type, min_value=value)) >= value117 )118 assert (119 data.draw(strategies.lt_strategy(data_type, max_value=value)) < value120 )121 assert (122 data.draw(strategies.le_strategy(data_type, max_value=value)) <= value123 )124def value_ranges(data_type: pa.DataType):125 """Strategy to generate value range based on PandasDtype"""126 kwargs = dict(127 allow_nan=False,128 allow_infinity=False,129 exclude_min=False,130 exclude_max=False,131 )132 return (133 st.tuples(134 strategies.pandas_dtype_strategy(135 data_type, strategy=None, **kwargs136 ),137 strategies.pandas_dtype_strategy(138 data_type, strategy=None, **kwargs139 ),140 )141 .map(sorted)142 .filter(lambda x: x[0] < x[1])143 )144@pytest.mark.parametrize("data_type", NUMERIC_DTYPES)145@pytest.mark.parametrize(146 "strat_fn, arg_name, base_st_type, compare_op",147 [148 [strategies.ne_strategy, "value", "type", operator.ne],149 [strategies.eq_strategy, "value", "just", operator.eq],150 [strategies.gt_strategy, "min_value", "limit", operator.gt],151 [strategies.ge_strategy, "min_value", "limit", operator.ge],152 [strategies.lt_strategy, "max_value", "limit", operator.lt],153 [strategies.le_strategy, "max_value", "limit", operator.le],154 ],155)156@hypothesis.given(st.data())157@hypothesis.settings(158 suppress_health_check=[hypothesis.HealthCheck.too_slow],159)160def test_check_strategy_chained_continuous(161 data_type, strat_fn, arg_name, base_st_type, compare_op, data162):163 """164 Test built-in check strategies can generate continuous data building off165 of a parent strategy.166 """167 min_value, max_value = data.draw(value_ranges(data_type))168 hypothesis.assume(min_value < max_value)169 value = min_value170 base_st = strategies.pandas_dtype_strategy(171 data_type,172 min_value=min_value,173 max_value=max_value,174 allow_nan=False,175 allow_infinity=False,176 )177 if base_st_type == "type":178 assert_base_st = base_st179 elif base_st_type == "just":180 assert_base_st = st.just(value)181 elif base_st_type == "limit":182 assert_base_st = strategies.pandas_dtype_strategy(183 data_type,184 min_value=min_value,185 max_value=max_value,186 allow_nan=False,187 allow_infinity=False,188 )189 else:190 raise RuntimeError(f"base_st_type {base_st_type} not recognized")191 local_vars = locals()192 assert_value = local_vars[arg_name]193 example = data.draw(194 strat_fn(data_type, assert_base_st, **{arg_name: assert_value})195 )196 assert compare_op(example, assert_value)197@pytest.mark.parametrize("data_type", NUMERIC_DTYPES)198@pytest.mark.parametrize("chained", [True, False])199@hypothesis.given(st.data())200@hypothesis.settings(201 suppress_health_check=[hypothesis.HealthCheck.too_slow],202)203def test_in_range_strategy(data_type, chained, data):204 """Test the built-in in-range strategy can correctly generate data."""205 min_value, max_value = data.draw(value_ranges(data_type))206 hypothesis.assume(min_value < max_value)207 base_st_in_range = None208 if chained:209 if is_float(data_type):210 base_st_kwargs = {211 "exclude_min": False,212 "exclude_max": False,213 }214 else:215 base_st_kwargs = {}216 # constraining the strategy this way makes testing more efficient217 base_st_in_range = strategies.pandas_dtype_strategy(218 data_type,219 min_value=min_value,220 max_value=max_value,221 **base_st_kwargs, # type: ignore[arg-type]222 )223 strat = strategies.in_range_strategy(224 data_type,225 base_st_in_range,226 min_value=min_value,227 max_value=max_value,228 )229 assert min_value <= data.draw(strat) <= max_value230@pytest.mark.parametrize(231 "data_type",232 [data_type for data_type in SUPPORTED_DTYPES if data_type.continuous],233)234@pytest.mark.parametrize("chained", [True, False])235@hypothesis.given(st.data())236@hypothesis.settings(237 suppress_health_check=[hypothesis.HealthCheck.too_slow],238)239def test_isin_notin_strategies(data_type, chained, data):240 """Test built-in check strategies that rely on discrete values."""241 value_st = strategies.pandas_dtype_strategy(242 data_type,243 allow_nan=False,244 allow_infinity=False,245 exclude_min=False,246 exclude_max=False,247 )248 values = [data.draw(value_st) for _ in range(10)]249 isin_base_st = None250 notin_base_st = None251 if chained:252 base_values = values + [data.draw(value_st) for _ in range(10)]253 isin_base_st = strategies.isin_strategy(254 data_type, allowed_values=base_values255 )256 notin_base_st = strategies.notin_strategy(257 data_type, forbidden_values=base_values258 )259 isin_st = strategies.isin_strategy(260 data_type, isin_base_st, allowed_values=values261 )262 notin_st = strategies.notin_strategy(263 data_type, notin_base_st, forbidden_values=values264 )265 assert data.draw(isin_st) in values266 assert data.draw(notin_st) not in values267@pytest.mark.parametrize(268 "str_strat, pattern_fn",269 [270 [271 strategies.str_matches_strategy,272 lambda patt: f"^{patt}$",273 ],274 [strategies.str_contains_strategy, None],275 [strategies.str_startswith_strategy, None],276 [strategies.str_endswith_strategy, None],277 ],278)279@pytest.mark.parametrize("chained", [True, False])280@hypothesis.given(st.data(), st.text())281def test_str_pattern_checks(282 str_strat: Callable,283 pattern_fn: Optional[Callable[..., str]],284 chained: bool,285 data,286 pattern,287) -> None:288 """Test built-in check strategies for string pattern checks."""289 try:290 re.compile(pattern)291 re_compiles = True292 except re.error:293 re_compiles = False294 hypothesis.assume(re_compiles)295 pattern = pattern if pattern_fn is None else pattern_fn(pattern)296 base_st = None297 if chained:298 try:299 base_st = str_strat(pa.String, pattern=pattern)300 except TypeError:301 base_st = str_strat(pa.String, string=pattern)302 try:303 st = str_strat(pa.String, base_st, pattern=pattern)304 except TypeError:305 st = str_strat(pa.String, base_st, string=pattern)306 example = data.draw(st)307 assert re.search(pattern, example)308@pytest.mark.parametrize("chained", [True, False])309@hypothesis.given(310 st.data(),311 (312 st.tuples(313 st.integers(min_value=0, max_value=100),314 st.integers(min_value=0, max_value=100),315 )316 .map(sorted) # type: ignore[arg-type]317 .filter(lambda x: x[0] < x[1]) # type: ignore318 ),319)320@hypothesis.settings(suppress_health_check=[hypothesis.HealthCheck.too_slow])321def test_str_length_checks(chained, data, value_range):322 """Test built-in check strategies for string length."""323 min_value, max_value = value_range324 base_st = None325 if chained:326 base_st = strategies.str_length_strategy(327 pa.String,328 min_value=max(0, min_value - 5),329 max_value=max_value + 5,330 )331 str_length_st = strategies.str_length_strategy(332 pa.String, base_st, min_value=min_value, max_value=max_value333 )334 example = data.draw(str_length_st)335 assert min_value <= len(example) <= max_value336@hypothesis.given(st.data())337def test_register_check_strategy(data) -> None:338 """Test registering check strategy on a custom check."""339 # pylint: disable=unused-argument340 def custom_eq_strategy(341 pandas_dtype: pa.DataType,342 strategy: st.SearchStrategy = None,343 *,344 value: Any,345 ):346 return st.just(value).map(strategies.to_numpy_dtype(pandas_dtype).type)347 # pylint: disable=no-member348 class CustomCheck(_CheckBase):349 """Custom check class."""350 @classmethod351 @strategies.register_check_strategy(custom_eq_strategy)352 @register_check_statistics(["value"])353 def custom_equals(cls, value, **kwargs) -> "CustomCheck":354 """Define a built-in check."""355 def _custom_equals(series: pd.Series) -> pd.Series:356 """Comparison function for check"""357 return series == value358 return cls(359 _custom_equals,360 name=cls.custom_equals.__name__,361 error=f"equal_to({value})",362 **kwargs,363 )364 check = CustomCheck.custom_equals(100)365 result = data.draw(check.strategy(pa.Int()))366 assert result == 100367def test_register_check_strategy_exception() -> None:368 """Check method needs statistics attr to register a strategy."""369 def custom_strat() -> None:370 pass371 class CustomCheck(_CheckBase):372 """Custom check class."""373 @classmethod374 @strategies.register_check_strategy(custom_strat) # type: ignore[arg-type]375 # mypy correctly identifies the error376 def custom_check(cls, **kwargs) -> "CustomCheck":377 """Built-in check with no statistics."""378 def _custom_check(series: pd.Series) -> pd.Series:379 """Some check function."""380 return series381 return cls(382 _custom_check,383 name=cls.custom_check.__name__,384 **kwargs,385 )386 with pytest.raises(387 AttributeError,388 match="check object doesn't have a defined statistics property",389 ):390 CustomCheck.custom_check()391@hypothesis.given(st.data())392@hypothesis.settings(393 suppress_health_check=[hypothesis.HealthCheck.too_slow],394)395def test_series_strategy(data) -> None:396 """Test SeriesSchema strategy."""397 series_schema = pa.SeriesSchema(pa.Int(), pa.Check.gt(0))398 series_schema(data.draw(series_schema.strategy()))399def test_series_example() -> None:400 """Test SeriesSchema example method generate examples that pass."""401 series_schema = pa.SeriesSchema(pa.Int(), pa.Check.gt(0))402 for _ in range(10):403 series_schema(series_schema.example())404@hypothesis.given(st.data())405@hypothesis.settings(406 suppress_health_check=[hypothesis.HealthCheck.too_slow],407)408def test_column_strategy(data) -> None:409 """Test Column schema strategy."""410 column_schema = pa.Column(pa.Int(), pa.Check.gt(0), name="column")411 column_schema(data.draw(column_schema.strategy()))412def test_column_example():413 """Test Column schema example method generate examples that pass."""414 column_schema = pa.Column(pa.Int(), pa.Check.gt(0), name="column")415 for _ in range(10):416 column_schema(column_schema.example())417@pytest.mark.parametrize("data_type", SUPPORTED_DTYPES)418@pytest.mark.parametrize("size", [None, 0, 1, 3, 5])419@hypothesis.given(st.data())420@hypothesis.settings(suppress_health_check=[hypothesis.HealthCheck.too_slow])421def test_dataframe_strategy(data_type, size, data):422 """Test DataFrameSchema strategy."""423 dataframe_schema = pa.DataFrameSchema(424 {f"{data_type}_col": pa.Column(data_type)}425 )426 df_sample = data.draw(dataframe_schema.strategy(size=size))427 if size == 0:428 assert df_sample.empty429 elif size is None:430 assert df_sample.empty or isinstance(431 dataframe_schema(df_sample), pd.DataFrame432 )433 else:434 assert isinstance(dataframe_schema(df_sample), pd.DataFrame)435 with pytest.raises(pa.errors.BaseStrategyOnlyError):436 strategies.dataframe_strategy(437 data_type, strategies.pandas_dtype_strategy(data_type)438 )439@hypothesis.given(st.data())440@hypothesis.settings(suppress_health_check=[hypothesis.HealthCheck.too_slow])441def test_dataframe_example(data) -> None:442 """Test DataFrameSchema example method generate examples that pass."""443 schema = pa.DataFrameSchema({"column": pa.Column(int, pa.Check.gt(0))})444 df_sample = data.draw(schema.strategy(size=10))445 schema(df_sample)446@pytest.mark.parametrize("size", [3, 5, 10])447@hypothesis.given(st.data())448@hypothesis.settings(suppress_health_check=[hypothesis.HealthCheck.too_slow])449def test_dataframe_unique(size, data) -> None:450 """Test that DataFrameSchemas with unique columns are actually unique."""451 schema = pa.DataFrameSchema(452 {453 "col1": pa.Column(int),454 "col2": pa.Column(float),455 "col3": pa.Column(str),456 "col4": pa.Column(int),457 },458 unique=["col1", "col2", "col3"],459 )460 df_sample = data.draw(schema.strategy(size=size))461 schema(df_sample)462@pytest.mark.parametrize(463 "regex",464 [465 "col_[0-9]{1,4}",466 "[a-zA-Z]+_foobar",467 "[a-z]+_[0-9]+_[a-z]+",468 ],469)470@hypothesis.given(st.data(), st.integers(min_value=-5, max_value=5))471@hypothesis.settings(472 suppress_health_check=[hypothesis.HealthCheck.too_slow],473)474def test_dataframe_with_regex(regex: str, data, n_regex_columns: int) -> None:475 """Test DataFrameSchema strategy with regex columns"""476 dataframe_schema = pa.DataFrameSchema({regex: pa.Column(int, regex=True)})477 if n_regex_columns < 1:478 with pytest.raises(ValueError):479 dataframe_schema.strategy(size=5, n_regex_columns=n_regex_columns)480 else:481 df = dataframe_schema(482 data.draw(483 dataframe_schema.strategy(484 size=5, n_regex_columns=n_regex_columns485 )486 )487 )488 assert df.shape[1] == n_regex_columns489@pytest.mark.parametrize("data_type", NUMERIC_DTYPES)490@hypothesis.settings(491 suppress_health_check=[hypothesis.HealthCheck.too_slow],492)493@hypothesis.given(st.data())494def test_dataframe_checks(data_type, data):495 """Test dataframe strategy with checks defined at the dataframe level."""496 min_value, max_value = data.draw(value_ranges(data_type))497 dataframe_schema = pa.DataFrameSchema(498 {f"{data_type}_col": pa.Column(data_type) for _ in range(5)},499 checks=pa.Check.in_range(min_value, max_value),500 )501 strat = dataframe_schema.strategy(size=5)502 example = data.draw(strat)503 dataframe_schema(example)504@pytest.mark.parametrize(505 "data_type", [pa.Int(), pa.Float, pa.String, pa.DateTime]506)507@hypothesis.given(st.data())508@hypothesis.settings(509 suppress_health_check=[hypothesis.HealthCheck.too_slow],510)511def test_dataframe_strategy_with_indexes(data_type, data):512 """Test dataframe strategy with index and multiindex components."""513 dataframe_schema_index = pa.DataFrameSchema(index=pa.Index(data_type))514 dataframe_schema_multiindex = pa.DataFrameSchema(515 index=pa.MultiIndex(516 [pa.Index(data_type, name=f"index{i}") for i in range(3)]517 )518 )519 dataframe_schema_index(data.draw(dataframe_schema_index.strategy(size=10)))520 dataframe_schema_multiindex(521 data.draw(dataframe_schema_multiindex.strategy(size=10))522 )523@hypothesis.given(st.data())524@hypothesis.settings(525 suppress_health_check=[hypothesis.HealthCheck.too_slow],526)527def test_index_strategy(data) -> None:528 """Test Index schema component strategy."""529 data_type = pa.Int()530 index_schema = pa.Index(data_type, unique=True, name="index")531 strat = index_schema.strategy(size=10)532 example = data.draw(strat)533 assert (~example.duplicated()).all()534 actual_data_type = pandas_engine.Engine.dtype(example.dtype)535 assert data_type.check(actual_data_type)536 index_schema(pd.DataFrame(index=example))537def test_index_example() -> None:538 """539 Test Index schema component example method generates examples that pass.540 """541 data_type = pa.Int()542 index_schema = pa.Index(data_type, unique=True)543 for _ in range(10):544 index_schema(pd.DataFrame(index=index_schema.example()))545@hypothesis.given(st.data())546@hypothesis.settings(547 suppress_health_check=[hypothesis.HealthCheck.too_slow],548)549def test_multiindex_strategy(data) -> None:550 """Test MultiIndex schema component strategy."""551 data_type = pa.Float()552 multiindex = pa.MultiIndex(553 indexes=[554 pa.Index(data_type, unique=True, name="level_0"),555 pa.Index(data_type, nullable=True),556 pa.Index(data_type),557 ]558 )559 strat = multiindex.strategy(size=10)560 example = data.draw(strat)561 for i in range(example.nlevels):562 actual_data_type = pandas_engine.Engine.dtype(563 example.get_level_values(i).dtype564 )565 assert data_type.check(actual_data_type)566 with pytest.raises(pa.errors.BaseStrategyOnlyError):567 strategies.multiindex_strategy(568 data_type, strategies.pandas_dtype_strategy(data_type)569 )570def test_multiindex_example() -> None:571 """572 Test MultiIndex schema component example method generates examples that573 pass.574 """575 data_type = pa.Float()576 multiindex = pa.MultiIndex(577 indexes=[578 pa.Index(data_type, unique=True, name="level_0"),579 pa.Index(data_type, nullable=True),580 pa.Index(data_type),581 ]582 )583 for _ in range(10):584 example = multiindex.example()585 multiindex(pd.DataFrame(index=example))586@pytest.mark.parametrize("data_type", NULLABLE_DTYPES)587@hypothesis.given(st.data())588def test_field_element_strategy(data_type, data):589 """Test strategy for generating elements in columns/indexes."""590 strategy = strategies.field_element_strategy(data_type)591 element = data.draw(strategy)592 expected_type = strategies.to_numpy_dtype(data_type).type593 assert element.dtype.type == expected_type594 with pytest.raises(pa.errors.BaseStrategyOnlyError):595 strategies.field_element_strategy(596 data_type, strategies.pandas_dtype_strategy(data_type)597 )598@pytest.mark.parametrize("data_type", NULLABLE_DTYPES)599@pytest.mark.parametrize(600 "field_strategy",601 [strategies.index_strategy, strategies.series_strategy],602)603@pytest.mark.parametrize("nullable", [True, False])604@hypothesis.given(st.data())605@hypothesis.settings(606 suppress_health_check=[hypothesis.HealthCheck.too_slow],607)608def test_check_nullable_field_strategy(609 data_type, field_strategy, nullable, data610):...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pandera automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful