How to use undefined_check_strategy method in pandera

Best Python code snippet using pandera_python

strategies.py

Source:strategies.py Github

copy

Full Screen

...638 "strategy argument to chain it to a parent strategy."639 )640 checks = [] if checks is None else checks641 elements = None642 def undefined_check_strategy(elements, check):643 """Strategy for checks with undefined strategies."""644 warnings.warn(645 "Element-wise check doesn't have a defined strategy."646 "Falling back to filtering drawn values based on the check "647 "definition. This can considerably slow down data-generation."648 )649 return (650 pandas_dtype_strategy(pandera_dtype)651 if elements is None652 else elements653 ).filter(check._check_fn)654 for check in checks:655 if hasattr(check, "strategy"):656 elements = check.strategy(pandera_dtype, elements)657 elif check.element_wise:658 elements = undefined_check_strategy(elements, check)659 # NOTE: vectorized checks with undefined strategies should be handled660 # by the series/dataframe strategy.661 if elements is None:662 elements = pandas_dtype_strategy(pandera_dtype)663 # Hypothesis only supports pure numpy datetime64 (i.e. timezone naive).664 # We cast to datetime64 after applying the check strategy so that checks665 # can see timezone-aware values.666 if _is_datetime_tz(pandera_dtype):667 elements = _timestamp_to_datetime64_strategy(elements)668 return elements669def series_strategy(670 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],671 strategy: Optional[SearchStrategy] = None,672 *,673 checks: Optional[Sequence] = None,674 nullable: bool = False,675 unique: bool = False,676 name: Optional[str] = None,677 size: Optional[int] = None,678):679 """Strategy to generate a pandas Series.680 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.681 :param strategy: an optional hypothesis strategy. If specified, the682 pandas dtype strategy will be chained onto this strategy.683 :param checks: sequence of :class:`~pandera.checks.Check` s to constrain684 the values of the data in the column/index.685 :param nullable: whether or not generated Series contains null values.686 :param unique: whether or not generated Series contains unique values.687 :param name: name of the Series.688 :param size: number of elements in the Series.689 :returns: ``hypothesis`` strategy.690 """691 elements = field_element_strategy(pandera_dtype, strategy, checks=checks)692 strategy = (693 pdst.series(694 elements=elements,695 dtype=to_numpy_dtype(pandera_dtype),696 index=pdst.range_indexes(697 min_size=0 if size is None else size, max_size=size698 ),699 unique=unique,700 )701 .filter(lambda x: x.shape[0] > 0)702 .map(lambda x: x.rename(name))703 .map(lambda x: x.astype(pandera_dtype.type))704 )705 if nullable:706 strategy = null_field_masks(strategy)707 def undefined_check_strategy(strategy, check):708 """Strategy for checks with undefined strategies."""709 warnings.warn(710 "Vectorized check doesn't have a defined strategy."711 "Falling back to filtering drawn values based on the check "712 "definition. This can considerably slow down data-generation."713 )714 def _check_fn(series):715 return check(series).check_passed716 return strategy.filter(_check_fn)717 for check in checks if checks is not None else []:718 if not hasattr(check, "strategy") and not check.element_wise:719 strategy = undefined_check_strategy(strategy, check)720 return strategy721def column_strategy(722 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],723 strategy: Optional[SearchStrategy] = None,724 *,725 checks: Optional[Sequence] = None,726 unique: bool = False,727 name: Optional[str] = None,728):729 # pylint: disable=line-too-long730 """Create a data object describing a column in a DataFrame.731 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.732 :param strategy: an optional hypothesis strategy. If specified, the733 pandas dtype strategy will be chained onto this strategy.734 :param checks: sequence of :class:`~pandera.checks.Check` s to constrain735 the values of the data in the column/index.736 :param unique: whether or not generated Series contains unique values.737 :param name: name of the Series.738 :returns: a `column <https://hypothesis.readthedocs.io/en/latest/numpy.html#hypothesis.extra.pandas.column>`_ object.739 """740 verify_dtype(pandera_dtype, schema_type="column", name=name)741 elements = field_element_strategy(pandera_dtype, strategy, checks=checks)742 return pdst.column(743 name=name,744 elements=elements,745 dtype=to_numpy_dtype(pandera_dtype),746 unique=unique,747 )748def index_strategy(749 pandera_dtype: Union[numpy_engine.DataType, pandas_engine.DataType],750 strategy: Optional[SearchStrategy] = None,751 *,752 checks: Optional[Sequence] = None,753 nullable: bool = False,754 unique: bool = False,755 name: Optional[str] = None,756 size: Optional[int] = None,757):758 """Strategy to generate a pandas Index.759 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.760 :param strategy: an optional hypothesis strategy. If specified, the761 pandas dtype strategy will be chained onto this strategy.762 :param checks: sequence of :class:`~pandera.checks.Check` s to constrain763 the values of the data in the column/index.764 :param nullable: whether or not generated Series contains null values.765 :param unique: whether or not generated Series contains unique values.766 :param name: name of the Series.767 :param size: number of elements in the Series.768 :returns: ``hypothesis`` strategy.769 """770 verify_dtype(pandera_dtype, schema_type="index", name=name)771 elements = field_element_strategy(pandera_dtype, strategy, checks=checks)772 strategy = pdst.indexes(773 elements=elements,774 dtype=to_numpy_dtype(pandera_dtype),775 min_size=0 if size is None else size,776 max_size=size,777 unique=unique,778 ).map(lambda x: x.astype(pandera_dtype.type))779 # this is a hack to convert np.str_ data values into native python str.780 col_dtype = str(pandera_dtype)781 if col_dtype in {"object", "str"} or col_dtype.startswith("string"):782 # pylint: disable=cell-var-from-loop,undefined-loop-variable783 strategy = strategy.map(lambda index: index.map(str))784 if name is not None:785 strategy = strategy.map(lambda index: index.rename(name))786 if nullable:787 strategy = null_field_masks(strategy)788 return strategy789def dataframe_strategy(790 pandera_dtype: Optional[DataType] = None,791 strategy: Optional[SearchStrategy] = None,792 *,793 columns: Optional[Dict] = None,794 checks: Optional[Sequence] = None,795 unique: Optional[List[str]] = None,796 index: Optional[IndexComponent] = None,797 size: Optional[int] = None,798 n_regex_columns: int = 1,799):800 """Strategy to generate a pandas DataFrame.801 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance.802 :param strategy: if specified, this will raise a BaseStrategyOnlyError,803 since it cannot be chained to a prior strategy.804 :param columns: a dictionary where keys are column names and values805 are :class:`~pandera.schema_components.Column` objects.806 :param checks: sequence of :class:`~pandera.checks.Check` s to constrain807 the values of the data at the dataframe level.808 :param unique: a list of column names that should be jointly unique.809 :param index: Index or MultiIndex schema component.810 :param size: number of elements in the Series.811 :param n_regex_columns: number of regex columns to generate.812 :returns: ``hypothesis`` strategy.813 """814 # pylint: disable=too-many-locals,too-many-branches,too-many-statements815 if n_regex_columns < 1:816 raise ValueError(817 "`n_regex_columns` must be a positive integer, found: "818 f"{n_regex_columns}"819 )820 if strategy:821 raise BaseStrategyOnlyError(822 "The dataframe strategy is a base strategy. You cannot specify "823 "the strategy argument to chain it to a parent strategy."824 )825 columns = {} if columns is None else columns826 checks = [] if checks is None else checks827 def undefined_check_strategy(strategy, check, column=None):828 """Strategy for checks with undefined strategies."""829 def _element_wise_check_fn(element):830 return check._check_fn(element)831 def _column_check_fn(dataframe):832 return check(dataframe[column]).check_passed833 def _dataframe_check_fn(dataframe):834 return check(dataframe).check_passed835 if check.element_wise:836 check_fn = _element_wise_check_fn837 warning_type = "Element-wise"838 elif column is None:839 check_fn = _dataframe_check_fn840 warning_type = "Dataframe"841 else:842 check_fn = _column_check_fn843 warning_type = "Column"844 warnings.warn(845 f"{warning_type} check doesn't have a defined strategy. "846 "Falling back to filtering drawn values based on the check "847 "definition. This can considerably slow down data-generation."848 )849 return strategy.filter(check_fn)850 def make_row_strategy(col, checks):851 strategy = None852 for check in checks:853 if hasattr(check, "strategy"):854 strategy = check.strategy(col.dtype, strategy)855 else:856 strategy = undefined_check_strategy(857 strategy=(858 pandas_dtype_strategy(col.dtype)859 if strategy is None860 else strategy861 ),862 check=check,863 )864 if strategy is None:865 strategy = pandas_dtype_strategy(col.dtype)866 return strategy867 @composite868 def _dataframe_strategy(draw):869 row_strategy_checks = []870 undefined_strat_df_checks = []871 for check in checks:872 if hasattr(check, "strategy") or check.element_wise:873 # we can apply element-wise checks defined at the dataframe874 # level to the row strategy875 row_strategy_checks.append(check)876 else:877 undefined_strat_df_checks.append(check)878 # expand column set to generate column names for columns where879 # regex=True.880 expanded_columns = {}881 for col_name, column in columns.items():882 if unique and col_name in unique:883 # if the column is in the set of columns specified in `unique`,884 # make the column strategy independently unique. This is885 # technically stricter than it should be, since the list of886 # columns in `unique` are required to be jointly unique, but887 # this is a simple solution that produces synthetic data that888 # fulfills the uniqueness constraints of the dataframe.889 column = deepcopy(column)890 column.unique = True891 if not column.regex:892 expanded_columns[col_name] = column893 else:894 regex_columns = draw(895 st.lists(896 st.from_regex(column.name, fullmatch=True),897 min_size=n_regex_columns,898 max_size=n_regex_columns,899 unique=True,900 )901 )902 for regex_col in regex_columns:903 expanded_columns[regex_col] = deepcopy(column).set_name(904 regex_col905 )906 # collect all non-element-wise column checks with undefined strategies907 undefined_strat_column_checks: Dict[str, list] = defaultdict(list)908 for col_name, column in expanded_columns.items():909 undefined_strat_column_checks[col_name].extend(910 check911 for check in column.checks912 if not hasattr(check, "strategy") and not check.element_wise913 )914 # override the column datatype with dataframe-level datatype if915 # specified916 col_dtypes = {917 col_name: str(col.dtype)918 if pandera_dtype is None919 else str(pandera_dtype)920 for col_name, col in expanded_columns.items()921 }922 nullable_columns = {923 col_name: col.nullable924 for col_name, col in expanded_columns.items()925 }926 row_strategy = None927 if row_strategy_checks:928 row_strategy = st.fixed_dictionaries(929 {930 col_name: make_row_strategy(col, row_strategy_checks)931 for col_name, col in expanded_columns.items()932 }933 )934 strategy = pdst.data_frames(935 columns=[936 column.strategy_component()937 for column in expanded_columns.values()938 ],939 rows=row_strategy,940 index=pdst.range_indexes(941 min_size=0 if size is None else size, max_size=size942 ),943 )944 # this is a hack to convert np.str_ data values into native python str.945 for col_name, col_dtype in col_dtypes.items():946 if col_dtype in {"object", "str"} or col_dtype.startswith(947 "string"948 ):949 # pylint: disable=cell-var-from-loop,undefined-loop-variable950 strategy = strategy.map(951 lambda df: df.assign(**{col_name: df[col_name].map(str)})952 )953 strategy = strategy.map(954 lambda df: df if df.empty else df.astype(col_dtypes)955 )956 if size is not None and size > 0 and any(nullable_columns.values()):957 strategy = null_dataframe_masks(strategy, nullable_columns)958 if index is not None:959 strategy = set_pandas_index(strategy, index)960 for check in undefined_strat_df_checks:961 strategy = undefined_check_strategy(strategy, check)962 for col_name, column_checks in undefined_strat_column_checks.items():963 for check in column_checks: # type: ignore964 strategy = undefined_check_strategy(965 strategy, check, column=col_name966 )967 return draw(strategy)968 return _dataframe_strategy()969# pylint: disable=unused-argument970def multiindex_strategy(971 pandera_dtype: Optional[DataType] = None,972 strategy: Optional[SearchStrategy] = None,973 *,974 indexes: Optional[List] = None,975 size: Optional[int] = None,976):977 """Strategy to generate a pandas MultiIndex object.978 :param pandera_dtype: :class:`pandera.dtypes.DataType` instance....

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pandera automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful