How to use null_dataframe_masks method in pandera

Best Python code snippet using pandera_python

strategies.py

Source:strategies.py Github

copy

Full Screen

...78 val = _mask(val, null_mask)79 return pd.Index(val)80 return _mask(val, null_mask)81@composite82def null_dataframe_masks(83 draw,84 strategy: Optional[SearchStrategy],85 nullable_columns: Dict[str, bool],86):87 """Strategy for masking a values in a pandas DataFrame.88 :param strategy: an optional hypothesis strategy. If specified, the89 pandas dtype strategy will be chained onto this strategy.90 :param nullable_columns: dictionary where keys are column names and91 values indicate whether that column is nullable.92 """93 val = draw(strategy)94 size = val.shape[0]95 columns_strat = []96 for name, nullable in nullable_columns.items():97 element_st = st.booleans() if nullable else st.just(False)98 columns_strat.append(99 pdst.column(100 name=name,101 elements=element_st,102 dtype=bool,103 fill=st.just(False),104 )105 )106 mask_st = pdst.data_frames(107 columns=columns_strat,108 index=pdst.range_indexes(min_size=size, max_size=size),109 )110 null_mask = draw(mask_st)111 for column in val:112 val[column] = _mask(val[column], null_mask[column])113 return val114@composite115def set_pandas_index(116 draw,117 df_or_series_strat: SearchStrategy,118 index: IndexComponent,119):120 """Sets Index or MultiIndex object to pandas Series or DataFrame."""121 df_or_series = draw(df_or_series_strat)122 df_or_series.index = draw(index.strategy(size=df_or_series.shape[0]))123 return df_or_series124def verify_dtype(125 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],126 schema_type: str,127 name: Optional[str],128):129 """Verify that pandera_dtype argument is not None."""130 if pandera_dtype is None:131 raise SchemaDefinitionError(132 f"'{schema_type}' schema with name '{name}' has no specified "133 "dtype. You need to specify one in order to synthesize "134 "data from a strategy."135 )136def strategy_import_error(fn: F) -> F:137 """Decorator to generate input error if dependency is missing."""138 @wraps(fn)139 def _wrapper(*args, **kwargs):140 if not HAS_HYPOTHESIS: # pragma: no cover141 raise ImportError(142 'Strategies for generating data requires "hypothesis" to be \n'143 "installed. You can install pandera together with the IO \n"144 "dependencies with:\n"145 "pip install pandera[strategies]"146 )147 return fn(*args, **kwargs)148 return cast(F, _wrapper)149def register_check_strategy(strategy_fn: StrategyFn):150 """Decorate a Check method with a strategy.151 This should be applied to a built-in :class:`~pandera.checks.Check` method.152 :param strategy_fn: add strategy to a check, using check statistics to153 generate a ``hypothesis`` strategy.154 """155 def register_check_strategy_decorator(class_method):156 """Decorator that wraps Check class method."""157 @wraps(class_method)158 def _wrapper(cls, *args, **kwargs):159 check = class_method(cls, *args, **kwargs)160 if check.statistics is None:161 raise AttributeError(162 "check object doesn't have a defined statistics property. "163 "Use the checks.register_check_statistics decorator to "164 f"specify the statistics for the {class_method.__name__} "165 "method."166 )167 strategy_kwargs = {168 arg: stat169 for arg, stat in check.statistics.items()170 if stat is not None171 }172 check.strategy = partial(strategy_fn, **strategy_kwargs)173 return check174 return _wrapper175 return register_check_strategy_decorator176# pylint: disable=line-too-long177# Values taken from178# https://hypothesis.readthedocs.io/en/latest/_modules/hypothesis/extra/numpy.html#from_dtype # noqa179MIN_DT_VALUE = -(2**63)180MAX_DT_VALUE = 2**63 - 1181def _is_datetime_tz(pandera_dtype: DataType) -> bool:182 native_type = getattr(pandera_dtype, "type", None)183 return isinstance(native_type, pd.DatetimeTZDtype)184def _datetime_strategy(185 dtype: Union[np.dtype, pd.DatetimeTZDtype], strategy186) -> SearchStrategy:187 if isinstance(dtype, pd.DatetimeTZDtype):188 def _to_datetime(value) -> pd.DatetimeTZDtype:189 if isinstance(value, pd.Timestamp):190 return value.tz_convert(tz=dtype.tz) # type: ignore[union-attr]191 return pd.Timestamp(value, unit=dtype.unit, tz=dtype.tz) # type: ignore[union-attr]192 return st.builds(_to_datetime, strategy)193 else:194 res = (195 st.just(dtype.str.split("[")[-1][:-1])196 if "[" in dtype.str197 else st.sampled_from(npst.TIME_RESOLUTIONS)198 )199 return st.builds(dtype.type, strategy, res)200def numpy_time_dtypes(201 dtype: Union[np.dtype, pd.DatetimeTZDtype], min_value=None, max_value=None202):203 """Create numpy strategy for datetime and timedelta data types.204 :param dtype: numpy datetime or timedelta datatype205 :param min_value: minimum value of the datatype to create206 :param max_value: maximum value of the datatype to create207 :returns: ``hypothesis`` strategy208 """209 def _to_unix(value: Any) -> int:210 if dtype.type is np.timedelta64:211 return pd.Timedelta(value).value212 return pd.Timestamp(value).value213 min_value = MIN_DT_VALUE if min_value is None else _to_unix(min_value)214 max_value = MAX_DT_VALUE if max_value is None else _to_unix(max_value)215 return _datetime_strategy(dtype, st.integers(min_value, max_value))216def numpy_complex_dtypes(217 dtype,218 min_value: complex = complex(0, 0),219 max_value: Optional[complex] = None,220 allow_infinity: bool = None,221 allow_nan: bool = None,222):223 """Create numpy strategy for complex numbers.224 :param dtype: numpy complex number datatype225 :param min_value: minimum value, must be complex number226 :param max_value: maximum value, must be complex number227 :returns: ``hypothesis`` strategy228 """229 max_real: Optional[float]230 max_imag: Optional[float]231 if max_value:232 max_real = max_value.real233 max_imag = max_value.imag234 else:235 max_real = max_imag = None236 if dtype.itemsize == 8:237 width = 32238 else:239 width = 64240 # switch min and max values for imaginary if min value > max value241 if max_imag is not None and min_value.imag > max_imag:242 min_imag = max_imag243 max_imag = min_value.imag244 else:245 min_imag = min_value.imag246 strategy = st.builds(247 complex,248 st.floats(249 min_value=min_value.real,250 max_value=max_real,251 width=width,252 allow_infinity=allow_infinity,253 allow_nan=allow_nan,254 ),255 st.floats(256 min_value=min_imag,257 max_value=max_imag,258 width=width,259 allow_infinity=allow_infinity,260 allow_nan=allow_nan,261 ),262 ).map(dtype.type)263 @st.composite264 def build_complex(draw):265 value = draw(strategy)266 hypothesis.assume(min_value <= value)267 if max_value is not None:268 hypothesis.assume(max_value >= value)269 return value270 return build_complex()271def to_numpy_dtype(pandera_dtype: DataType):272 """Convert a :class:`~pandera.dtypes.DataType` to numpy dtype compatible273 with hypothesis."""274 try:275 np_dtype = pandas_engine.Engine.numpy_dtype(pandera_dtype)276 except TypeError as err:277 if is_datetime(pandera_dtype):278 return np.dtype("datetime64[ns]")279 raise TypeError(280 f"Data generation for the '{pandera_dtype}' data type is "281 "currently unsupported."282 ) from err283 if np_dtype == np.dtype("object") or str(pandera_dtype) == "str":284 np_dtype = np.dtype(str)285 return np_dtype286def pandas_dtype_strategy(287 pandera_dtype: DataType,288 strategy: Optional[SearchStrategy] = None,289 **kwargs,290) -> SearchStrategy:291 # pylint: disable=line-too-long,no-else-raise292 """Strategy to generate data from a :class:`pandera.dtypes.DataType`.293 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.294 :param strategy: an optional hypothesis strategy. If specified, the295 pandas dtype strategy will be chained onto this strategy.296 :kwargs: key-word arguments passed into297 `hypothesis.extra.numpy.from_dtype <https://hypothesis.readthedocs.io/en/latest/numpy.html#hypothesis.extra.numpy.from_dtype>`_ .298 For datetime, timedelta, and complex number datatypes, these arguments299 are passed into :func:`~pandera.strategies.numpy_time_dtypes` and300 :func:`~pandera.strategies.numpy_complex_dtypes`.301 :returns: ``hypothesis`` strategy302 """303 def compat_kwargs(*args):304 return {k: v for k, v in kwargs.items() if k in args}305 # hypothesis doesn't support categoricals or objects, so we'll will need to306 # build a pandera-specific solution.307 if is_category(pandera_dtype):308 raise TypeError(309 "data generation for the Category dtype is currently "310 "unsupported. Consider using a string or int dtype and "311 "Check.isin(values) to ensure a finite set of values."312 )313 np_dtype = to_numpy_dtype(pandera_dtype)314 if strategy:315 if _is_datetime_tz(pandera_dtype):316 return _datetime_strategy(pandera_dtype.type, strategy) # type: ignore317 return strategy.map(np_dtype.type)318 elif is_datetime(pandera_dtype) or is_timedelta(pandera_dtype):319 return numpy_time_dtypes(320 pandera_dtype.type if _is_datetime_tz(pandera_dtype) else np_dtype, # type: ignore321 **compat_kwargs("min_value", "max_value"),322 )323 elif is_complex(pandera_dtype):324 return numpy_complex_dtypes(325 np_dtype,326 **compat_kwargs(327 "min_value", "max_value", "allow_infinity", "allow_nan"328 ),329 )330 return npst.from_dtype(331 np_dtype,332 **{ # type: ignore333 "allow_nan": False,334 "allow_infinity": False,335 **kwargs,336 },337 )338def eq_strategy(339 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],340 strategy: Optional[SearchStrategy] = None,341 *,342 value: Any,343) -> SearchStrategy:344 """Strategy to generate a single value.345 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.346 :param strategy: an optional hypothesis strategy. If specified, the347 pandas dtype strategy will be chained onto this strategy.348 :param value: value to generate.349 :returns: ``hypothesis`` strategy350 """351 # override strategy preceding this one and generate value of the same type352 # pylint: disable=unused-argument353 return pandas_dtype_strategy(pandera_dtype, st.just(value))354def ne_strategy(355 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],356 strategy: Optional[SearchStrategy] = None,357 *,358 value: Any,359) -> SearchStrategy:360 """Strategy to generate anything except for a particular value.361 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.362 :param strategy: an optional hypothesis strategy. If specified, the363 pandas dtype strategy will be chained onto this strategy.364 :param value: value to avoid.365 :returns: ``hypothesis`` strategy366 """367 if strategy is None:368 strategy = pandas_dtype_strategy(pandera_dtype)369 return strategy.filter(lambda x: x != value)370def gt_strategy(371 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],372 strategy: Optional[SearchStrategy] = None,373 *,374 min_value: Union[int, float],375) -> SearchStrategy:376 """Strategy to generate values greater than a minimum value.377 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.378 :param strategy: an optional hypothesis strategy. If specified, the379 pandas dtype strategy will be chained onto this strategy.380 :param min_value: generate values larger than this.381 :returns: ``hypothesis`` strategy382 """383 if strategy is None:384 strategy = pandas_dtype_strategy(385 pandera_dtype,386 min_value=min_value,387 exclude_min=True if is_float(pandera_dtype) else None,388 )389 return strategy.filter(lambda x: x > min_value)390def ge_strategy(391 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],392 strategy: Optional[SearchStrategy] = None,393 *,394 min_value: Union[int, float],395) -> SearchStrategy:396 """Strategy to generate values greater than or equal to a minimum value.397 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.398 :param strategy: an optional hypothesis strategy. If specified, the399 pandas dtype strategy will be chained onto this strategy.400 :param min_value: generate values greater than or equal to this.401 :returns: ``hypothesis`` strategy402 """403 if strategy is None:404 return pandas_dtype_strategy(405 pandera_dtype,406 min_value=min_value,407 exclude_min=False if is_float(pandera_dtype) else None,408 )409 return strategy.filter(lambda x: x >= min_value)410def lt_strategy(411 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],412 strategy: Optional[SearchStrategy] = None,413 *,414 max_value: Union[int, float],415) -> SearchStrategy:416 """Strategy to generate values less than a maximum value.417 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.418 :param strategy: an optional hypothesis strategy. If specified, the419 pandas dtype strategy will be chained onto this strategy.420 :param max_value: generate values less than this.421 :returns: ``hypothesis`` strategy422 """423 if strategy is None:424 strategy = pandas_dtype_strategy(425 pandera_dtype,426 max_value=max_value,427 exclude_max=True if is_float(pandera_dtype) else None,428 )429 return strategy.filter(lambda x: x < max_value)430def le_strategy(431 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],432 strategy: Optional[SearchStrategy] = None,433 *,434 max_value: Union[int, float],435) -> SearchStrategy:436 """Strategy to generate values less than or equal to a maximum value.437 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.438 :param strategy: an optional hypothesis strategy. If specified, the439 pandas dtype strategy will be chained onto this strategy.440 :param max_value: generate values less than or equal to this.441 :returns: ``hypothesis`` strategy442 """443 if strategy is None:444 return pandas_dtype_strategy(445 pandera_dtype,446 max_value=max_value,447 exclude_max=False if is_float(pandera_dtype) else None,448 )449 return strategy.filter(lambda x: x <= max_value)450def in_range_strategy(451 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],452 strategy: Optional[SearchStrategy] = None,453 *,454 min_value: Union[int, float],455 max_value: Union[int, float],456 include_min: bool = True,457 include_max: bool = True,458) -> SearchStrategy:459 """Strategy to generate values within a particular range.460 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.461 :param strategy: an optional hypothesis strategy. If specified, the462 pandas dtype strategy will be chained onto this strategy.463 :param min_value: generate values greater than this.464 :param max_value: generate values less than this.465 :param include_min: include min_value in generated data.466 :param include_max: include max_value in generated data.467 :returns: ``hypothesis`` strategy468 """469 if strategy is None:470 return pandas_dtype_strategy(471 pandera_dtype,472 min_value=min_value,473 max_value=max_value,474 exclude_min=not include_min,475 exclude_max=not include_max,476 )477 min_op = operator.ge if include_min else operator.gt478 max_op = operator.le if include_max else operator.lt479 return strategy.filter(480 lambda x: min_op(x, min_value) and max_op(x, max_value)481 )482def isin_strategy(483 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],484 strategy: Optional[SearchStrategy] = None,485 *,486 allowed_values: Sequence[Any],487) -> SearchStrategy:488 """Strategy to generate values within a finite set.489 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.490 :param strategy: an optional hypothesis strategy. If specified, the491 pandas dtype strategy will be chained onto this strategy.492 :param allowed_values: set of allowable values.493 :returns: ``hypothesis`` strategy494 """495 if strategy is None:496 return pandas_dtype_strategy(497 pandera_dtype, st.sampled_from(allowed_values)498 )499 return strategy.filter(lambda x: x in allowed_values)500def notin_strategy(501 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],502 strategy: Optional[SearchStrategy] = None,503 *,504 forbidden_values: Sequence[Any],505) -> SearchStrategy:506 """Strategy to generate values excluding a set of forbidden values507 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.508 :param strategy: an optional hypothesis strategy. If specified, the509 pandas dtype strategy will be chained onto this strategy.510 :param forbidden_values: set of forbidden values.511 :returns: ``hypothesis`` strategy512 """513 if strategy is None:514 strategy = pandas_dtype_strategy(pandera_dtype)515 return strategy.filter(lambda x: x not in forbidden_values)516def str_matches_strategy(517 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],518 strategy: Optional[SearchStrategy] = None,519 *,520 pattern: str,521) -> SearchStrategy:522 """Strategy to generate strings that patch a regex pattern.523 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.524 :param strategy: an optional hypothesis strategy. If specified, the525 pandas dtype strategy will be chained onto this strategy.526 :param pattern: regex pattern.527 :returns: ``hypothesis`` strategy528 """529 if strategy is None:530 return st.from_regex(pattern, fullmatch=True).map(531 to_numpy_dtype(pandera_dtype).type532 )533 def matches(x):534 return re.match(pattern, x)535 return strategy.filter(matches)536def str_contains_strategy(537 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],538 strategy: Optional[SearchStrategy] = None,539 *,540 pattern: str,541) -> SearchStrategy:542 """Strategy to generate strings that contain a particular pattern.543 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.544 :param strategy: an optional hypothesis strategy. If specified, the545 pandas dtype strategy will be chained onto this strategy.546 :param pattern: regex pattern.547 :returns: ``hypothesis`` strategy548 """549 if strategy is None:550 return st.from_regex(pattern, fullmatch=False).map(551 to_numpy_dtype(pandera_dtype).type552 )553 def contains(x):554 return re.search(pattern, x)555 return strategy.filter(contains)556def str_startswith_strategy(557 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],558 strategy: Optional[SearchStrategy] = None,559 *,560 string: str,561) -> SearchStrategy:562 """Strategy to generate strings that start with a specific string pattern.563 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.564 :param strategy: an optional hypothesis strategy. If specified, the565 pandas dtype strategy will be chained onto this strategy.566 :param string: string pattern.567 :returns: ``hypothesis`` strategy568 """569 if strategy is None:570 return st.from_regex(f"\\A{string}", fullmatch=False).map(571 to_numpy_dtype(pandera_dtype).type572 )573 return strategy.filter(lambda x: x.startswith(string))574def str_endswith_strategy(575 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],576 strategy: Optional[SearchStrategy] = None,577 *,578 string: str,579) -> SearchStrategy:580 """Strategy to generate strings that end with a specific string pattern.581 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.582 :param strategy: an optional hypothesis strategy. If specified, the583 pandas dtype strategy will be chained onto this strategy.584 :param string: string pattern.585 :returns: ``hypothesis`` strategy586 """587 if strategy is None:588 return st.from_regex(f"{string}\\Z", fullmatch=False).map(589 to_numpy_dtype(pandera_dtype).type590 )591 return strategy.filter(lambda x: x.endswith(string))592def str_length_strategy(593 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],594 strategy: Optional[SearchStrategy] = None,595 *,596 min_value: int,597 max_value: int,598) -> SearchStrategy:599 """Strategy to generate strings of a particular length600 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.601 :param strategy: an optional hypothesis strategy. If specified, the602 pandas dtype strategy will be chained onto this strategy.603 :param min_value: minimum string length.604 :param max_value: maximum string length.605 :returns: ``hypothesis`` strategy606 """607 if strategy is None:608 return st.text(min_size=min_value, max_size=max_value).map(609 to_numpy_dtype(pandera_dtype).type610 )611 return strategy.filter(lambda x: min_value <= len(x) <= max_value)612def _timestamp_to_datetime64_strategy(613 strategy: SearchStrategy,614) -> SearchStrategy:615 """Convert timestamp to numpy.datetime64616 Hypothesis only supports pure numpy dtypes but numpy.datetime64() truncates617 nanoseconds if given a pandas.Timestamp. We need to pass the unix epoch via618 the pandas.Timestamp.value attribute.619 """620 return st.builds(lambda x: np.datetime64(x.value, "ns"), strategy)621def field_element_strategy(622 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],623 strategy: Optional[SearchStrategy] = None,624 *,625 checks: Optional[Sequence] = None,626) -> SearchStrategy:627 """Strategy to generate elements of a column or index.628 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.629 :param strategy: an optional hypothesis strategy. If specified, the630 pandas dtype strategy will be chained onto this strategy.631 :param checks: sequence of :class:`~pandera.checks.Check` s to constrain632 the values of the data in the column/index.633 :returns: ``hypothesis`` strategy634 """635 if strategy:636 raise BaseStrategyOnlyError(637 "The series strategy is a base strategy. You cannot specify the "638 "strategy argument to chain it to a parent strategy."639 )640 checks = [] if checks is None else checks641 elements = None642 def undefined_check_strategy(elements, check):643 """Strategy for checks with undefined strategies."""644 warnings.warn(645 "Element-wise check doesn't have a defined strategy."646 "Falling back to filtering drawn values based on the check "647 "definition. This can considerably slow down data-generation."648 )649 return (650 pandas_dtype_strategy(pandera_dtype)651 if elements is None652 else elements653 ).filter(check._check_fn)654 for check in checks:655 if hasattr(check, "strategy"):656 elements = check.strategy(pandera_dtype, elements)657 elif check.element_wise:658 elements = undefined_check_strategy(elements, check)659 # NOTE: vectorized checks with undefined strategies should be handled660 # by the series/dataframe strategy.661 if elements is None:662 elements = pandas_dtype_strategy(pandera_dtype)663 # Hypothesis only supports pure numpy datetime64 (i.e. timezone naive).664 # We cast to datetime64 after applying the check strategy so that checks665 # can see timezone-aware values.666 if _is_datetime_tz(pandera_dtype):667 elements = _timestamp_to_datetime64_strategy(elements)668 return elements669def series_strategy(670 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],671 strategy: Optional[SearchStrategy] = None,672 *,673 checks: Optional[Sequence] = None,674 nullable: bool = False,675 unique: bool = False,676 name: Optional[str] = None,677 size: Optional[int] = None,678):679 """Strategy to generate a pandas Series.680 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.681 :param strategy: an optional hypothesis strategy. If specified, the682 pandas dtype strategy will be chained onto this strategy.683 :param checks: sequence of :class:`~pandera.checks.Check` s to constrain684 the values of the data in the column/index.685 :param nullable: whether or not generated Series contains null values.686 :param unique: whether or not generated Series contains unique values.687 :param name: name of the Series.688 :param size: number of elements in the Series.689 :returns: ``hypothesis`` strategy.690 """691 elements = field_element_strategy(pandera_dtype, strategy, checks=checks)692 strategy = (693 pdst.series(694 elements=elements,695 dtype=to_numpy_dtype(pandera_dtype),696 index=pdst.range_indexes(697 min_size=0 if size is None else size, max_size=size698 ),699 unique=unique,700 )701 .filter(lambda x: x.shape[0] > 0)702 .map(lambda x: x.rename(name))703 .map(lambda x: x.astype(pandera_dtype.type))704 )705 if nullable:706 strategy = null_field_masks(strategy)707 def undefined_check_strategy(strategy, check):708 """Strategy for checks with undefined strategies."""709 warnings.warn(710 "Vectorized check doesn't have a defined strategy."711 "Falling back to filtering drawn values based on the check "712 "definition. This can considerably slow down data-generation."713 )714 def _check_fn(series):715 return check(series).check_passed716 return strategy.filter(_check_fn)717 for check in checks if checks is not None else []:718 if not hasattr(check, "strategy") and not check.element_wise:719 strategy = undefined_check_strategy(strategy, check)720 return strategy721def column_strategy(722 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],723 strategy: Optional[SearchStrategy] = None,724 *,725 checks: Optional[Sequence] = None,726 unique: bool = False,727 name: Optional[str] = None,728):729 # pylint: disable=line-too-long730 """Create a data object describing a column in a DataFrame.731 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.732 :param strategy: an optional hypothesis strategy. If specified, the733 pandas dtype strategy will be chained onto this strategy.734 :param checks: sequence of :class:`~pandera.checks.Check` s to constrain735 the values of the data in the column/index.736 :param unique: whether or not generated Series contains unique values.737 :param name: name of the Series.738 :returns: a `column <https://hypothesis.readthedocs.io/en/latest/numpy.html#hypothesis.extra.pandas.column>`_ object.739 """740 verify_dtype(pandera_dtype, schema_type="column", name=name)741 elements = field_element_strategy(pandera_dtype, strategy, checks=checks)742 return pdst.column(743 name=name,744 elements=elements,745 dtype=to_numpy_dtype(pandera_dtype),746 unique=unique,747 )748def index_strategy(749 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],750 strategy: Optional[SearchStrategy] = None,751 *,752 checks: Optional[Sequence] = None,753 nullable: bool = False,754 unique: bool = False,755 name: Optional[str] = None,756 size: Optional[int] = None,757):758 """Strategy to generate a pandas Index.759 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.760 :param strategy: an optional hypothesis strategy. If specified, the761 pandas dtype strategy will be chained onto this strategy.762 :param checks: sequence of :class:`~pandera.checks.Check` s to constrain763 the values of the data in the column/index.764 :param nullable: whether or not generated Series contains null values.765 :param unique: whether or not generated Series contains unique values.766 :param name: name of the Series.767 :param size: number of elements in the Series.768 :returns: ``hypothesis`` strategy.769 """770 verify_dtype(pandera_dtype, schema_type="index", name=name)771 elements = field_element_strategy(pandera_dtype, strategy, checks=checks)772 strategy = pdst.indexes(773 elements=elements,774 dtype=to_numpy_dtype(pandera_dtype),775 min_size=0 if size is None else size,776 max_size=size,777 unique=unique,778 ).map(lambda x: x.astype(pandera_dtype.type))779 # this is a hack to convert np.str_ data values into native python str.780 col_dtype = str(pandera_dtype)781 if col_dtype in {"object", "str"} or col_dtype.startswith("string"):782 # pylint: disable=cell-var-from-loop,undefined-loop-variable783 strategy = strategy.map(lambda index: index.map(str))784 if name is not None:785 strategy = strategy.map(lambda index: index.rename(name))786 if nullable:787 strategy = null_field_masks(strategy)788 return strategy789def dataframe_strategy(790 pandera_dtype: Optional[DataType] = None,791 strategy: Optional[SearchStrategy] = None,792 *,793 columns: Optional[Dict] = None,794 checks: Optional[Sequence] = None,795 unique: Optional[List[str]] = None,796 index: Optional[IndexComponent] = None,797 size: Optional[int] = None,798 n_regex_columns: int = 1,799):800 """Strategy to generate a pandas DataFrame.801 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.802 :param strategy: if specified, this will raise a BaseStrategyOnlyError,803 since it cannot be chained to a prior strategy.804 :param columns: a dictionary where keys are column names and values805 are :class:`~pandera.schema_components.Column` objects.806 :param checks: sequence of :class:`~pandera.checks.Check` s to constrain807 the values of the data at the dataframe level.808 :param unique: a list of column names that should be jointly unique.809 :param index: Index or MultiIndex schema component.810 :param size: number of elements in the Series.811 :param n_regex_columns: number of regex columns to generate.812 :returns: ``hypothesis`` strategy.813 """814 # pylint: disable=too-many-locals,too-many-branches,too-many-statements815 if n_regex_columns < 1:816 raise ValueError(817 "`n_regex_columns` must be a positive integer, found: "818 f"{n_regex_columns}"819 )820 if strategy:821 raise BaseStrategyOnlyError(822 "The dataframe strategy is a base strategy. You cannot specify "823 "the strategy argument to chain it to a parent strategy."824 )825 columns = {} if columns is None else columns826 checks = [] if checks is None else checks827 def undefined_check_strategy(strategy, check, column=None):828 """Strategy for checks with undefined strategies."""829 def _element_wise_check_fn(element):830 return check._check_fn(element)831 def _column_check_fn(dataframe):832 return check(dataframe[column]).check_passed833 def _dataframe_check_fn(dataframe):834 return check(dataframe).check_passed835 if check.element_wise:836 check_fn = _element_wise_check_fn837 warning_type = "Element-wise"838 elif column is None:839 check_fn = _dataframe_check_fn840 warning_type = "Dataframe"841 else:842 check_fn = _column_check_fn843 warning_type = "Column"844 warnings.warn(845 f"{warning_type} check doesn't have a defined strategy. "846 "Falling back to filtering drawn values based on the check "847 "definition. This can considerably slow down data-generation."848 )849 return strategy.filter(check_fn)850 def make_row_strategy(col, checks):851 strategy = None852 for check in checks:853 if hasattr(check, "strategy"):854 strategy = check.strategy(col.dtype, strategy)855 else:856 strategy = undefined_check_strategy(857 strategy=(858 pandas_dtype_strategy(col.dtype)859 if strategy is None860 else strategy861 ),862 check=check,863 )864 if strategy is None:865 strategy = pandas_dtype_strategy(col.dtype)866 return strategy867 @composite868 def _dataframe_strategy(draw):869 row_strategy_checks = []870 undefined_strat_df_checks = []871 for check in checks:872 if hasattr(check, "strategy") or check.element_wise:873 # we can apply element-wise checks defined at the dataframe874 # level to the row strategy875 row_strategy_checks.append(check)876 else:877 undefined_strat_df_checks.append(check)878 # expand column set to generate column names for columns where879 # regex=True.880 expanded_columns = {}881 for col_name, column in columns.items():882 if unique and col_name in unique:883 # if the column is in the set of columns specified in `unique`,884 # make the column strategy independently unique. This is885 # technically stricter than it should be, since the list of886 # columns in `unique` are required to be jointly unique, but887 # this is a simple solution that produces synthetic data that888 # fulfills the uniqueness constraints of the dataframe.889 column = deepcopy(column)890 column.unique = True891 if not column.regex:892 expanded_columns[col_name] = column893 else:894 regex_columns = draw(895 st.lists(896 st.from_regex(column.name, fullmatch=True),897 min_size=n_regex_columns,898 max_size=n_regex_columns,899 unique=True,900 )901 )902 for regex_col in regex_columns:903 expanded_columns[regex_col] = deepcopy(column).set_name(904 regex_col905 )906 # collect all non-element-wise column checks with undefined strategies907 undefined_strat_column_checks: Dict[str, list] = defaultdict(list)908 for col_name, column in expanded_columns.items():909 undefined_strat_column_checks[col_name].extend(910 check911 for check in column.checks912 if not hasattr(check, "strategy") and not check.element_wise913 )914 # override the column datatype with dataframe-level datatype if915 # specified916 col_dtypes = {917 col_name: str(col.dtype)918 if pandera_dtype is None919 else str(pandera_dtype)920 for col_name, col in expanded_columns.items()921 }922 nullable_columns = {923 col_name: col.nullable924 for col_name, col in expanded_columns.items()925 }926 row_strategy = None927 if row_strategy_checks:928 row_strategy = st.fixed_dictionaries(929 {930 col_name: make_row_strategy(col, row_strategy_checks)931 for col_name, col in expanded_columns.items()932 }933 )934 strategy = pdst.data_frames(935 columns=[936 column.strategy_component()937 for column in expanded_columns.values()938 ],939 rows=row_strategy,940 index=pdst.range_indexes(941 min_size=0 if size is None else size, max_size=size942 ),943 )944 # this is a hack to convert np.str_ data values into native python str.945 for col_name, col_dtype in col_dtypes.items():946 if col_dtype in {"object", "str"} or col_dtype.startswith(947 "string"948 ):949 # pylint: disable=cell-var-from-loop,undefined-loop-variable950 strategy = strategy.map(951 lambda df: df.assign(**{col_name: df[col_name].map(str)})952 )953 strategy = strategy.map(954 lambda df: df if df.empty else df.astype(col_dtypes)955 )956 if size is not None and size > 0 and any(nullable_columns.values()):957 strategy = null_dataframe_masks(strategy, nullable_columns)958 if index is not None:959 strategy = set_pandas_index(strategy, index)960 for check in undefined_strat_df_checks:961 strategy = undefined_check_strategy(strategy, check)962 for col_name, column_checks in undefined_strat_column_checks.items():963 for check in column_checks: # type: ignore964 strategy = undefined_check_strategy(965 strategy, check, column=col_name966 )967 return draw(strategy)968 return _dataframe_strategy()969# pylint: disable=unused-argument970def multiindex_strategy(971 pandera_dtype: Optional[DataType] = None,972 strategy: Optional[SearchStrategy] = None,973 *,974 indexes: Optional[List] = None,975 size: Optional[int] = None,976):977 """Strategy to generate a pandas MultiIndex object.978 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.979 :param strategy: an optional hypothesis strategy. If specified, the980 pandas dtype strategy will be chained onto this strategy.981 :param indexes: a list of :class:`~pandera.schema_components.Index`982 objects.983 :param size: number of elements in the Series.984 :returns: ``hypothesis`` strategy.985 """986 # pylint: disable=unnecessary-lambda987 if strategy:988 raise BaseStrategyOnlyError(989 "The dataframe strategy is a base strategy. You cannot specify "990 "the strategy argument to chain it to a parent strategy."991 )992 indexes = [] if indexes is None else indexes993 index_dtypes = {994 index.name if index.name is not None else i: str(index.dtype)995 for i, index in enumerate(indexes)996 }997 nullable_index = {998 index.name if index.name is not None else i: index.nullable999 for i, index in enumerate(indexes)1000 }1001 strategy = pdst.data_frames(1002 [index.strategy_component() for index in indexes],1003 index=pdst.range_indexes(1004 min_size=0 if size is None else size, max_size=size1005 ),1006 ).map(lambda x: x.astype(index_dtypes))1007 # this is a hack to convert np.str_ data values into native python str.1008 for name, dtype in index_dtypes.items():1009 if dtype in {"object", "str"} or dtype.startswith("string"):1010 # pylint: disable=cell-var-from-loop,undefined-loop-variable1011 strategy = strategy.map(1012 lambda df: df.assign(**{name: df[name].map(str)})1013 )1014 if any(nullable_index.values()):1015 strategy = null_dataframe_masks(strategy, nullable_index)...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pandera automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful