How to use _pandas_obj_to_validate method in pandera

Best Python code snippet using pandera_python

schemas.py

Source:schemas.py Github

copy

Full Screen

...585 col.dtype = self.dtype586 schema_components.append(col)587 if self.index is not None:588 schema_components.append(self.index)589 df_to_validate = _pandas_obj_to_validate(590 check_obj, head, tail, sample, random_state591 )592 check_results = []593 # schema-component-level checks594 for schema_component in schema_components:595 try:596 result = schema_component(597 df_to_validate,598 lazy=lazy,599 # don't make a copy of the data600 inplace=True,601 )602 check_results.append(check_utils.is_table(result))603 except errors.SchemaError as err:604 error_handler.collect_error("schema_component_check", err)605 except errors.SchemaErrors as err:606 for schema_error_dict in err.schema_errors:607 error_handler.collect_error(608 "schema_component_check", schema_error_dict["error"]609 )610 # dataframe-level checks611 for check_index, check in enumerate(self.checks):612 try:613 check_results.append(614 _handle_check_results(615 self, check_index, check, df_to_validate616 )617 )618 except errors.SchemaError as err:619 error_handler.collect_error("dataframe_check", err)620 if self.unique:621 # NOTE: fix this pylint error622 # pylint: disable=not-an-iterable623 temp_unique: List[List] = (624 [self.unique]625 if all(isinstance(x, str) for x in self.unique)626 else self.unique627 )628 for lst in temp_unique:629 duplicates = df_to_validate.duplicated(subset=lst, keep=False)630 if duplicates.any():631 # NOTE: this is a hack to support koalas, need to figure632 # out a workaround to error: "Cannot combine the series or633 # dataframe because it comes from a different dataframe."634 if type(duplicates).__module__.startswith(635 "databricks.koalas"636 ):637 # pylint: disable=import-outside-toplevel638 import databricks.koalas as ks639 with ks.option_context(640 "compute.ops_on_diff_frames", True641 ):642 failure_cases = df_to_validate.loc[duplicates, lst]643 else:644 failure_cases = df_to_validate.loc[duplicates, lst]645 failure_cases = reshape_failure_cases(failure_cases)646 error_handler.collect_error(647 "duplicates",648 errors.SchemaError(649 self,650 check_obj,651 f"columns '{*lst,}' not unique:\n{failure_cases}",652 failure_cases=failure_cases,653 check="multiple_fields_uniqueness",654 ),655 )656 if lazy and error_handler.collected_errors:657 raise errors.SchemaErrors(658 error_handler.collected_errors, check_obj659 )660 assert all(check_results), "all check results must be True."661 return check_obj662 def __call__(663 self,664 dataframe: pd.DataFrame,665 head: Optional[int] = None,666 tail: Optional[int] = None,667 sample: Optional[int] = None,668 random_state: Optional[int] = None,669 lazy: bool = False,670 inplace: bool = False,671 ):672 """Alias for :func:`DataFrameSchema.validate` method.673 :param pd.DataFrame dataframe: the dataframe to be validated.674 :param head: validate the first n rows. Rows overlapping with `tail` or675 `sample` are de-duplicated.676 :type head: int677 :param tail: validate the last n rows. Rows overlapping with `head` or678 `sample` are de-duplicated.679 :type tail: int680 :param sample: validate a random sample of n rows. Rows overlapping681 with `head` or `tail` are de-duplicated.682 :param random_state: random seed for the ``sample`` argument.683 :param lazy: if True, lazily evaluates dataframe against all validation684 checks and raises a ``SchemaErrors``. Otherwise, raise685 ``SchemaError`` as soon as one occurs.686 :param inplace: if True, applies coercion to the object of validation,687 otherwise creates a copy of the data.688 """689 return self.validate(690 dataframe, head, tail, sample, random_state, lazy, inplace691 )692 def __repr__(self) -> str:693 """Represent string for logging."""694 return (695 f"<Schema {self.__class__.__name__}("696 f"columns={self.columns}, "697 f"checks={self.checks}, "698 f"index={self.index.__repr__()}, "699 f"coerce={self.coerce}, "700 f"dtype={self._dtype},"701 f"strict={self.strict},"702 f"name={self.name},"703 f"ordered={self.ordered}"704 ")>"705 )706 def __str__(self) -> str:707 """Represent string for user inspection."""708 def _format_multiline(json_str, arg):709 return "\n".join(710 f"{indent}{line}" if i != 0 else f"{indent}{arg}={line}"711 for i, line in enumerate(json_str.split("\n"))712 )713 indent = " " * N_INDENT_SPACES714 if self.columns:715 columns_str = f"{indent}columns={{\n"716 for colname, col in self.columns.items():717 columns_str += f"{indent * 2}'{colname}': {col}\n"718 columns_str += f"{indent}}}"719 else:720 columns_str = f"{indent}columns={{}}"721 if self.checks:722 checks_str = f"{indent}checks=[\n"723 for check in self.checks:724 checks_str += f"{indent * 2}{check}\n"725 checks_str += f"{indent}]"726 else:727 checks_str = f"{indent}checks=[]"728 # add additional indents729 index_ = str(self.index).split("\n")730 if len(index_) == 1:731 index = str(self.index)732 else:733 index = "\n".join(734 x if i == 0 else f"{indent}{x}" for i, x in enumerate(index_)735 )736 return (737 f"<Schema {self.__class__.__name__}(\n"738 f"{columns_str},\n"739 f"{checks_str},\n"740 f"{indent}coerce={self.coerce},\n"741 f"{indent}dtype={self._dtype},\n"742 f"{indent}index={index},\n"743 f"{indent}strict={self.strict}\n"744 f"{indent}name={self.name},\n"745 f"{indent}ordered={self.ordered}\n"746 ")>"747 )748 def __eq__(self, other: object) -> bool:749 if not isinstance(other, type(self)):750 return NotImplemented751 def _compare_dict(obj):752 return {753 k: v for k, v in obj.__dict__.items() if k != "_IS_INFERRED"754 }755 return _compare_dict(self) == _compare_dict(other)756 @st.strategy_import_error757 def strategy(758 self, *, size: Optional[int] = None, n_regex_columns: int = 1759 ):760 """Create a ``hypothesis`` strategy for generating a DataFrame.761 :param size: number of elements to generate762 :param n_regex_columns: number of regex columns to generate.763 :returns: a strategy that generates pandas DataFrame objects.764 """765 return st.dataframe_strategy(766 self.dtype,767 columns=self.columns,768 checks=self.checks,769 unique=self.unique,770 index=self.index,771 size=size,772 n_regex_columns=n_regex_columns,773 )774 def example(775 self, size: Optional[int] = None, n_regex_columns: int = 1776 ) -> pd.DataFrame:777 """Generate an example of a particular size.778 :param size: number of elements in the generated DataFrame.779 :returns: pandas DataFrame object.780 """781 # pylint: disable=import-outside-toplevel,cyclic-import,import-error782 import hypothesis783 with warnings.catch_warnings():784 warnings.simplefilter(785 "ignore",786 category=hypothesis.errors.NonInteractiveExampleWarning,787 )788 return self.strategy(789 size=size, n_regex_columns=n_regex_columns790 ).example()791 @_inferred_schema_guard792 def add_columns(793 self, extra_schema_cols: Dict[str, Any]794 ) -> "DataFrameSchema":795 """Create a copy of the :class:`DataFrameSchema` with extra columns.796 :param extra_schema_cols: Additional columns of the format797 :type extra_schema_cols: DataFrameSchema798 :returns: a new :class:`DataFrameSchema` with the extra_schema_cols799 added.800 :example:801 To add columns to the schema, pass a dictionary with column name and802 ``Column`` instance key-value pairs.803 >>> import pandera as pa804 >>>805 >>> example_schema = pa.DataFrameSchema(806 ... {807 ... "category": pa.Column(str),808 ... "probability": pa.Column(float),809 ... }810 ... )811 >>> print(812 ... example_schema.add_columns({"even_number": pa.Column(pa.Bool)})813 ... )814 <Schema DataFrameSchema(815 columns={816 'category': <Schema Column(name=category, type=DataType(str))>817 'probability': <Schema Column(name=probability, type=DataType(float64))>818 'even_number': <Schema Column(name=even_number, type=DataType(bool))>819 },820 checks=[],821 coerce=False,822 dtype=None,823 index=None,824 strict=False825 name=None,826 ordered=False827 )>828 .. seealso:: :func:`remove_columns`829 """830 schema_copy = copy.deepcopy(self)831 schema_copy.columns = {832 **schema_copy.columns,833 **DataFrameSchema(extra_schema_cols).columns,834 }835 return schema_copy836 @_inferred_schema_guard837 def remove_columns(self, cols_to_remove: List[str]) -> "DataFrameSchema":838 """Removes columns from a :class:`DataFrameSchema` and returns a new839 copy.840 :param cols_to_remove: Columns to be removed from the841 ``DataFrameSchema``842 :type cols_to_remove: List843 :returns: a new :class:`DataFrameSchema` without the cols_to_remove844 :raises: :class:`~pandera.errors.SchemaInitError`: if column not in845 schema.846 :example:847 To remove a column or set of columns from a schema, pass a list of848 columns to be removed:849 >>> import pandera as pa850 >>>851 >>> example_schema = pa.DataFrameSchema(852 ... {853 ... "category" : pa.Column(str),854 ... "probability": pa.Column(float)855 ... }856 ... )857 >>>858 >>> print(example_schema.remove_columns(["category"]))859 <Schema DataFrameSchema(860 columns={861 'probability': <Schema Column(name=probability, type=DataType(float64))>862 },863 checks=[],864 coerce=False,865 dtype=None,866 index=None,867 strict=False868 name=None,869 ordered=False870 )>871 .. seealso:: :func:`add_columns`872 """873 schema_copy = copy.deepcopy(self)874 # ensure all specified keys are present in the columns875 not_in_cols: List[str] = [876 x for x in cols_to_remove if x not in schema_copy.columns.keys()877 ]878 if not_in_cols:879 raise errors.SchemaInitError(880 f"Keys {not_in_cols} not found in schema columns!"881 )882 for col in cols_to_remove:883 schema_copy.columns.pop(col)884 return schema_copy885 @_inferred_schema_guard886 def update_column(self, column_name: str, **kwargs) -> "DataFrameSchema":887 """Create copy of a :class:`DataFrameSchema` with updated column888 properties.889 :param column_name:890 :param kwargs: key-word arguments supplied to891 :class:`~pandera.schema_components.Column`892 :returns: a new :class:`DataFrameSchema` with updated column893 :raises: :class:`~pandera.errors.SchemaInitError`: if column not in894 schema or you try to change the name.895 :example:896 Calling ``schema.1`` returns the :class:`DataFrameSchema`897 with the updated column.898 >>> import pandera as pa899 >>>900 >>> example_schema = pa.DataFrameSchema({901 ... "category" : pa.Column(str),902 ... "probability": pa.Column(float)903 ... })904 >>> print(905 ... example_schema.update_column(906 ... 'category', dtype=pa.Category907 ... )908 ... )909 <Schema DataFrameSchema(910 columns={911 'category': <Schema Column(name=category, type=DataType(category))>912 'probability': <Schema Column(name=probability, type=DataType(float64))>913 },914 checks=[],915 coerce=False,916 dtype=None,917 index=None,918 strict=False919 name=None,920 ordered=False921 )>922 .. seealso:: :func:`rename_columns`923 """924 # check that columns exist in schema925 if "name" in kwargs:926 raise ValueError("cannot update 'name' of the column.")927 if column_name not in self.columns:928 raise ValueError(f"column '{column_name}' not in {self}")929 schema_copy = copy.deepcopy(self)930 column_copy = copy.deepcopy(self.columns[column_name])931 new_column = column_copy.__class__(932 **{**column_copy.properties, **kwargs}933 )934 schema_copy.columns.update({column_name: new_column})935 return schema_copy936 def update_columns(937 self, update_dict: Dict[str, Dict[str, Any]]938 ) -> "DataFrameSchema":939 """940 Create copy of a :class:`DataFrameSchema` with updated column941 properties.942 :param update_dict:943 :return: a new :class:`DataFrameSchema` with updated columns944 :raises: :class:`~pandera.errors.SchemaInitError`: if column not in945 schema or you try to change the name.946 :example:947 Calling ``schema.update_columns`` returns the :class:`DataFrameSchema`948 with the updated columns.949 >>> import pandera as pa950 >>>951 >>> example_schema = pa.DataFrameSchema({952 ... "category" : pa.Column(str),953 ... "probability": pa.Column(float)954 ... })955 >>>956 >>> print(957 ... example_schema.update_columns(958 ... {"category": {"dtype":pa.Category}}959 ... )960 ... )961 <Schema DataFrameSchema(962 columns={963 'category': <Schema Column(name=category, type=DataType(category))>964 'probability': <Schema Column(name=probability, type=DataType(float64))>965 },966 checks=[],967 coerce=False,968 dtype=None,969 index=None,970 strict=False971 name=None,972 ordered=False973 )>974 .. note:: This is the successor to the ``update_column`` method, which975 will be deprecated.976 """977 new_schema = copy.deepcopy(self)978 # ensure all specified keys are present in the columns979 not_in_cols: List[str] = [980 x for x in update_dict.keys() if x not in new_schema.columns.keys()981 ]982 if not_in_cols:983 raise errors.SchemaInitError(984 f"Keys {not_in_cols} not found in schema columns!"985 )986 new_columns: Dict[str, Column] = {}987 for col in new_schema.columns:988 # check989 if update_dict.get(col):990 if update_dict[col].get("name"):991 raise errors.SchemaInitError(992 "cannot update 'name' \993 property of the column."994 )995 original_properties = new_schema.columns[col].properties996 if update_dict.get(col):997 new_properties = copy.deepcopy(original_properties)998 new_properties.update(update_dict[col])999 new_columns[col] = new_schema.columns[col].__class__(1000 **new_properties1001 )1002 else:1003 new_columns[col] = new_schema.columns[col].__class__(1004 **original_properties1005 )1006 new_schema.columns = new_columns1007 return new_schema1008 def rename_columns(self, rename_dict: Dict[str, str]) -> "DataFrameSchema":1009 """Rename columns using a dictionary of key-value pairs.1010 :param rename_dict: dictionary of 'old_name': 'new_name' key-value1011 pairs.1012 :returns: :class:`DataFrameSchema` (copy of original)1013 :raises: :class:`~pandera.errors.SchemaInitError` if column not in the1014 schema.1015 :example:1016 To rename a column or set of columns, pass a dictionary of old column1017 names and new column names, similar to the pandas DataFrame method.1018 >>> import pandera as pa1019 >>>1020 >>> example_schema = pa.DataFrameSchema({1021 ... "category" : pa.Column(str),1022 ... "probability": pa.Column(float)1023 ... })1024 >>>1025 >>> print(1026 ... example_schema.rename_columns({1027 ... "category": "categories",1028 ... "probability": "probabilities"1029 ... })1030 ... )1031 <Schema DataFrameSchema(1032 columns={1033 'categories': <Schema Column(name=categories, type=DataType(str))>1034 'probabilities': <Schema Column(name=probabilities, type=DataType(float64))>1035 },1036 checks=[],1037 coerce=False,1038 dtype=None,1039 index=None,1040 strict=False1041 name=None,1042 ordered=False1043 )>1044 .. seealso:: :func:`update_column`1045 """1046 new_schema = copy.deepcopy(self)1047 # ensure all specified keys are present in the columns1048 not_in_cols: List[str] = [1049 x for x in rename_dict.keys() if x not in new_schema.columns.keys()1050 ]1051 if not_in_cols:1052 raise errors.SchemaInitError(1053 f"Keys {not_in_cols} not found in schema columns!"1054 )1055 # ensure all new keys are not present in the current column names1056 already_in_columns: List[str] = [1057 x for x in rename_dict.values() if x in new_schema.columns.keys()1058 ]1059 if already_in_columns:1060 raise errors.SchemaInitError(1061 f"Keys {already_in_columns} already found in schema columns!"1062 )1063 # We iterate over the existing columns dict and replace those keys1064 # that exist in the rename_dict1065 new_columns = {1066 (rename_dict[col_name] if col_name in rename_dict else col_name): (1067 col_attrs.set_name(rename_dict[col_name])1068 if col_name in rename_dict1069 else col_attrs1070 )1071 for col_name, col_attrs in new_schema.columns.items()1072 }1073 new_schema.columns = new_columns1074 return new_schema1075 def select_columns(self, columns: List[Any]) -> "DataFrameSchema":1076 """Select subset of columns in the schema.1077 *New in version 0.4.5*1078 :param columns: list of column names to select.1079 :returns: :class:`DataFrameSchema` (copy of original) with only1080 the selected columns.1081 :raises: :class:`~pandera.errors.SchemaInitError` if column not in the1082 schema.1083 :example:1084 To subset a schema by column, and return a new schema:1085 >>> import pandera as pa1086 >>>1087 >>> example_schema = pa.DataFrameSchema({1088 ... "category" : pa.Column(str),1089 ... "probability": pa.Column(float)1090 ... })1091 >>>1092 >>> print(example_schema.select_columns(['category']))1093 <Schema DataFrameSchema(1094 columns={1095 'category': <Schema Column(name=category, type=DataType(str))>1096 },1097 checks=[],1098 coerce=False,1099 dtype=None,1100 index=None,1101 strict=False1102 name=None,1103 ordered=False1104 )>1105 .. note:: If an index is present in the schema, it will also be1106 included in the new schema.1107 """1108 new_schema = copy.deepcopy(self)1109 # ensure all specified keys are present in the columns1110 not_in_cols: List[str] = [1111 x for x in columns if x not in new_schema.columns.keys()1112 ]1113 if not_in_cols:1114 raise errors.SchemaInitError(1115 f"Keys {not_in_cols} not found in schema columns!"1116 )1117 new_columns = {1118 col_name: column1119 for col_name, column in self.columns.items()1120 if col_name in columns1121 }1122 new_schema.columns = new_columns1123 return new_schema1124 def to_script(self, fp: Union[str, Path] = None) -> "DataFrameSchema":1125 """Create DataFrameSchema from yaml file.1126 :param path: str, Path to write script1127 :returns: dataframe schema.1128 """1129 # pylint: disable=import-outside-toplevel,cyclic-import1130 import pandera.io1131 return pandera.io.to_script(self, fp)1132 @classmethod1133 def from_yaml(cls, yaml_schema) -> "DataFrameSchema":1134 """Create DataFrameSchema from yaml file.1135 :param yaml_schema: str, Path to yaml schema, or serialized yaml1136 string.1137 :returns: dataframe schema.1138 """1139 # pylint: disable=import-outside-toplevel,cyclic-import1140 import pandera.io1141 return pandera.io.from_yaml(yaml_schema)1142 def to_yaml(self, stream: Optional[os.PathLike] = None):1143 """Write DataFrameSchema to yaml file.1144 :param stream: file stream to write to. If None, dumps to string.1145 :returns: yaml string if stream is None, otherwise returns None.1146 """1147 # pylint: disable=import-outside-toplevel,cyclic-import1148 import pandera.io1149 return pandera.io.to_yaml(self, stream=stream)1150 def set_index(1151 self, keys: List[str], drop: bool = True, append: bool = False1152 ) -> "DataFrameSchema":1153 """1154 A method for setting the :class:`Index` of a :class:`DataFrameSchema`,1155 via an existing :class:`Column` or list of columns.1156 :param keys: list of labels1157 :param drop: bool, default True1158 :param append: bool, default False1159 :return: a new :class:`DataFrameSchema` with specified column(s) in the1160 index.1161 :raises: :class:`~pandera.errors.SchemaInitError` if column not in the1162 schema.1163 :examples:1164 Just as you would set the index in a ``pandas`` DataFrame from an1165 existing column, you can set an index within the schema from an1166 existing column in the schema.1167 >>> import pandera as pa1168 >>>1169 >>> example_schema = pa.DataFrameSchema({1170 ... "category" : pa.Column(str),1171 ... "probability": pa.Column(float)})1172 >>>1173 >>> print(example_schema.set_index(['category']))1174 <Schema DataFrameSchema(1175 columns={1176 'probability': <Schema Column(name=probability, type=DataType(float64))>1177 },1178 checks=[],1179 coerce=False,1180 dtype=None,1181 index=<Schema Index(name=category, type=DataType(str))>,1182 strict=False1183 name=None,1184 ordered=False1185 )>1186 If you have an existing index in your schema, and you would like to1187 append a new column as an index to it (yielding a :class:`Multiindex`),1188 just use set_index as you would in pandas.1189 >>> example_schema = pa.DataFrameSchema(1190 ... {1191 ... "column1": pa.Column(str),1192 ... "column2": pa.Column(int)1193 ... },1194 ... index=pa.Index(name = "column3", dtype = int)1195 ... )1196 >>>1197 >>> print(example_schema.set_index(["column2"], append = True))1198 <Schema DataFrameSchema(1199 columns={1200 'column1': <Schema Column(name=column1, type=DataType(str))>1201 },1202 checks=[],1203 coerce=False,1204 dtype=None,1205 index=<Schema MultiIndex(1206 indexes=[1207 <Schema Index(name=column3, type=DataType(int64))>1208 <Schema Index(name=column2, type=DataType(int64))>1209 ]1210 coerce=False,1211 strict=False,1212 name=None,1213 ordered=True1214 )>,1215 strict=False1216 name=None,1217 ordered=False1218 )>1219 .. seealso:: :func:`reset_index`1220 """1221 # pylint: disable=import-outside-toplevel,cyclic-import1222 from pandera.schema_components import Index, MultiIndex1223 new_schema = copy.deepcopy(self)1224 keys_temp: List = (1225 list(set(keys)) if not isinstance(keys, list) else keys1226 )1227 # ensure all specified keys are present in the columns1228 not_in_cols: List[str] = [1229 x for x in keys_temp if x not in new_schema.columns.keys()1230 ]1231 if not_in_cols:1232 raise errors.SchemaInitError(1233 f"Keys {not_in_cols} not found in schema columns!"1234 )1235 # if there is already an index, append or replace according to1236 # parameters1237 ind_list: List = (1238 []1239 if new_schema.index is None or not append1240 else list(new_schema.index.indexes)1241 if isinstance(new_schema.index, MultiIndex) and append1242 else [new_schema.index]1243 )1244 for col in keys_temp:1245 ind_list.append(1246 Index(1247 dtype=new_schema.columns[col].dtype,1248 name=col,1249 checks=new_schema.columns[col].checks,1250 nullable=new_schema.columns[col].nullable,1251 unique=new_schema.columns[col].unique,1252 coerce=new_schema.columns[col].coerce,1253 )1254 )1255 new_schema.index = (1256 ind_list[0] if len(ind_list) == 1 else MultiIndex(ind_list)1257 )1258 # if drop is True as defaulted, drop the columns moved into the index1259 if drop:1260 new_schema = new_schema.remove_columns(keys_temp)1261 return new_schema1262 def reset_index(1263 self, level: List[str] = None, drop: bool = False1264 ) -> "DataFrameSchema":1265 """1266 A method for resetting the :class:`Index` of a :class:`DataFrameSchema`1267 :param level: list of labels1268 :param drop: bool, default True1269 :return: a new :class:`DataFrameSchema` with specified column(s) in the1270 index.1271 :raises: :class:`~pandera.errors.SchemaInitError` if no index set in1272 schema.1273 :examples:1274 Similar to the ``pandas`` reset_index method on a pandas DataFrame,1275 this method can be used to to fully or partially reset indices of a1276 schema.1277 To remove the entire index from the schema, just call the reset_index1278 method with default parameters.1279 >>> import pandera as pa1280 >>>1281 >>> example_schema = pa.DataFrameSchema(1282 ... {"probability" : pa.Column(float)},1283 ... index = pa.Index(name="unique_id", dtype=int)1284 ... )1285 >>>1286 >>> print(example_schema.reset_index())1287 <Schema DataFrameSchema(1288 columns={1289 'probability': <Schema Column(name=probability, type=DataType(float64))>1290 'unique_id': <Schema Column(name=unique_id, type=DataType(int64))>1291 },1292 checks=[],1293 coerce=False,1294 dtype=None,1295 index=None,1296 strict=False1297 name=None,1298 ordered=False1299 )>1300 This reclassifies an index (or indices) as a column (or columns).1301 Similarly, to partially alter the index, pass the name of the column1302 you would like to be removed to the ``level`` parameter, and you may1303 also decide whether to drop the levels with the ``drop`` parameter.1304 >>> example_schema = pa.DataFrameSchema({1305 ... "category" : pa.Column(str)},1306 ... index = pa.MultiIndex([1307 ... pa.Index(name="unique_id1", dtype=int),1308 ... pa.Index(name="unique_id2", dtype=str)1309 ... ]1310 ... )1311 ... )1312 >>> print(example_schema.reset_index(level = ["unique_id1"]))1313 <Schema DataFrameSchema(1314 columns={1315 'category': <Schema Column(name=category, type=DataType(str))>1316 'unique_id1': <Schema Column(name=unique_id1, type=DataType(int64))>1317 },1318 checks=[],1319 coerce=False,1320 dtype=None,1321 index=<Schema Index(name=unique_id2, type=DataType(str))>,1322 strict=False1323 name=None,1324 ordered=False1325 )>1326 .. seealso:: :func:`set_index`1327 """1328 # pylint: disable=import-outside-toplevel,cyclic-import1329 from pandera.schema_components import Column, Index, MultiIndex1330 new_schema = copy.deepcopy(self)1331 if new_schema.index is None:1332 raise errors.SchemaInitError(1333 "There is currently no index set for this schema."1334 )1335 # ensure no duplicates1336 level_temp: Union[List[Any], List[str]] = (1337 list(set(level)) if level is not None else []1338 )1339 # ensure all specified keys are present in the index1340 level_not_in_index: Union[List[Any], List[str], None] = (1341 [x for x in level_temp if x not in new_schema.index.names]1342 if isinstance(new_schema.index, MultiIndex) and level_temp1343 else []1344 if isinstance(new_schema.index, Index)1345 and (level_temp == [new_schema.index.name])1346 else level_temp1347 )1348 if level_not_in_index:1349 raise errors.SchemaInitError(1350 f"Keys {level_not_in_index} not found in schema columns!"1351 )1352 new_index = (1353 None1354 if not level_temp or isinstance(new_schema.index, Index)1355 else new_schema.index.remove_columns(level_temp)1356 )1357 new_index = (1358 new_index1359 if new_index is None1360 else Index(1361 dtype=new_index.columns[list(new_index.columns)[0]].dtype,1362 checks=new_index.columns[list(new_index.columns)[0]].checks,1363 nullable=new_index.columns[1364 list(new_index.columns)[0]1365 ].nullable,1366 unique=new_index.columns[list(new_index.columns)[0]].unique,1367 coerce=new_index.columns[list(new_index.columns)[0]].coerce,1368 name=new_index.columns[list(new_index.columns)[0]].name,1369 )1370 if (len(list(new_index.columns)) == 1) and (new_index is not None)1371 else None1372 if (len(list(new_index.columns)) == 0) and (new_index is not None)1373 else new_index1374 )1375 if not drop:1376 additional_columns: Dict[str, Any] = (1377 {col: new_schema.index.columns.get(col) for col in level_temp}1378 if isinstance(new_schema.index, MultiIndex)1379 else {new_schema.index.name: new_schema.index}1380 )1381 new_schema = new_schema.add_columns(1382 {1383 k: Column(1384 dtype=v.dtype,1385 checks=v.checks,1386 nullable=v.nullable,1387 unique=v.unique,1388 coerce=v.coerce,1389 name=v.name,1390 )1391 for (k, v) in additional_columns.items()1392 }1393 )1394 new_schema.index = new_index1395 return new_schema1396 @classmethod1397 def __get_validators__(cls):1398 yield cls._pydantic_validate1399 @classmethod1400 def _pydantic_validate(cls, schema: Any) -> "DataFrameSchema":1401 """Verify that the input is a compatible DataFrameSchema."""1402 if not isinstance(schema, cls): # type: ignore1403 raise TypeError(f"{schema} is not a {cls}.")1404 return cast("DataFrameSchema", schema)1405class SeriesSchemaBase:1406 """Base series validator object."""1407 @deprecate_pandas_dtype1408 def __init__(1409 self,1410 dtype: PandasDtypeInputTypes = None,1411 checks: CheckList = None,1412 nullable: bool = False,1413 unique: bool = False,1414 allow_duplicates: Optional[bool] = None,1415 coerce: bool = False,1416 name: Any = None,1417 pandas_dtype: PandasDtypeInputTypes = None,1418 title: Optional[str] = None,1419 description: Optional[str] = None,1420 ) -> None:1421 """Initialize series schema base object.1422 :param dtype: datatype of the column. If a string is specified,1423 then assumes one of the valid pandas string values:1424 http://pandas.pydata.org/pandas-docs/stable/basics.html#dtypes1425 :param checks: If element_wise is True, then callable signature should1426 be:1427 ``Callable[Any, bool]`` where the ``Any`` input is a scalar element1428 in the column. Otherwise, the input is assumed to be a1429 pandas.Series object.1430 :param nullable: Whether or not column can contain null values.1431 :param unique: Whether or not column can contain duplicate1432 values.1433 :param allow_duplicates: Whether or not column can contain duplicate1434 values.1435 .. warning::1436 This option will be deprecated in 0.8.0. Use the ``unique``1437 argument instead.1438 :param coerce: If True, when schema.validate is called the column will1439 be coerced into the specified dtype. This has no effect on columns1440 where ``dtype=None``.1441 :param name: column name in dataframe to validate.1442 :param pandas_dtype: alias of ``dtype`` for backwards compatibility.1443 .. warning:: This option will be deprecated in 0.8.01444 :param title: A human-readable label for the series.1445 :param description: An arbitrary textual description of the series.1446 :type nullable: bool1447 """1448 if checks is None:1449 checks = []1450 if isinstance(checks, (Check, Hypothesis)):1451 checks = [checks]1452 if allow_duplicates is not None:1453 warnings.warn(1454 "The `allow_duplicates` will be deprecated in "1455 "favor of the `unique` keyword. The value of "1456 "`unique` will be set to the opposite of "1457 "the `allow_duplicates` keyword.",1458 DeprecationWarning,1459 )1460 unique = not allow_duplicates1461 self.dtype = dtype or pandas_dtype # type: ignore1462 self._nullable = nullable1463 self._coerce = coerce1464 self._checks = checks1465 self._name = name1466 self._unique = unique1467 self._title = title1468 self._description = description1469 for check in self.checks:1470 if check.groupby is not None and not self._allow_groupby:1471 raise errors.SchemaInitError(1472 f"Cannot use groupby checks with type {type(self)}"1473 )1474 # make sure pandas dtype is valid1475 self.dtype # pylint: disable=pointless-statement1476 # this attribute is not meant to be accessed by users and is explicitly1477 # set to True in the case that a schema is created by infer_schema.1478 self._IS_INFERRED = False1479 # the _is_inferred getter and setter methods are not public1480 @property1481 def _is_inferred(self):1482 return self._IS_INFERRED1483 @_is_inferred.setter1484 def _is_inferred(self, value: bool):1485 self._IS_INFERRED = value1486 @property1487 def checks(self):1488 """Return list of checks or hypotheses."""1489 return self._checks1490 @checks.setter1491 def checks(self, checks):1492 self._checks = checks1493 @_inferred_schema_guard1494 def set_checks(self, checks: CheckList):1495 """Create a new SeriesSchema with a new set of Checks1496 :param checks: checks to set on the new schema1497 :returns: a new SeriesSchema with a new set of checks1498 """1499 schema_copy = copy.deepcopy(self)1500 schema_copy.checks = checks1501 return schema_copy1502 @property1503 def nullable(self) -> bool:1504 """Whether the series is nullable."""1505 return self._nullable1506 @property1507 def unique(self) -> bool:1508 """Whether to check for duplicates in check object"""1509 return self._unique1510 @unique.setter1511 def unique(self, value: bool) -> None:1512 """Set unique attribute"""1513 self._unique = value1514 @property1515 def allow_duplicates(self) -> bool:1516 """Whether to allow duplicate values."""1517 return not self._unique1518 @allow_duplicates.setter1519 def allow_duplicates(self, value: bool) -> None:1520 """Set allow_duplicates attribute."""1521 self._unique = not value1522 @property1523 def coerce(self) -> bool:1524 """Whether to coerce series to specified type."""1525 return self._coerce1526 @coerce.setter1527 def coerce(self, value: bool) -> None:1528 """Set coerce attribute."""1529 self._coerce = value1530 @property1531 def name(self) -> Union[str, None]:1532 """Get SeriesSchema name."""1533 return self._name1534 @property1535 def title(self):1536 """A human-readable label for the series."""1537 return self._title1538 @property1539 def description(self):1540 """An arbitrary textual description of the series."""1541 return self._description1542 @property1543 def dtype(1544 self,1545 ) -> DataType:1546 """Get the pandas dtype"""1547 return self._dtype # type: ignore1548 @dtype.setter1549 def dtype(self, value: PandasDtypeInputTypes) -> None:1550 """Set the pandas dtype"""1551 self._dtype = pandas_engine.Engine.dtype(value) if value else None1552 def coerce_dtype(self, obj: Union[pd.Series, pd.Index]) -> pd.Series:1553 """Coerce type of a pd.Series by type specified in dtype.1554 :param pd.Series series: One-dimensional ndarray with axis labels1555 (including time series).1556 :returns: ``Series`` with coerced data type1557 """1558 if self.dtype is None:1559 return obj1560 try:1561 return self.dtype.try_coerce(obj)1562 except errors.ParserError as exc:1563 msg = (1564 f"Error while coercing '{self.name}' to type "1565 f"{self.dtype}: {exc}:\n{exc.failure_cases}"1566 )1567 raise errors.SchemaError(1568 self,1569 obj,1570 msg,1571 failure_cases=exc.failure_cases,1572 check=f"coerce_dtype('{self.dtype}')",1573 ) from exc1574 @property1575 def _allow_groupby(self):1576 """Whether the schema or schema component allows groupby operations."""1577 raise NotImplementedError( # pragma: no cover1578 "The _allow_groupby property must be implemented by subclasses "1579 "of SeriesSchemaBase"1580 )1581 def validate(1582 self,1583 check_obj: Union[pd.DataFrame, pd.Series],1584 head: Optional[int] = None,1585 tail: Optional[int] = None,1586 sample: Optional[int] = None,1587 random_state: Optional[int] = None,1588 lazy: bool = False,1589 inplace: bool = False,1590 ) -> Union[pd.DataFrame, pd.Series]:1591 # pylint: disable=too-many-locals,too-many-branches,too-many-statements1592 """Validate a series or specific column in dataframe.1593 :check_obj: pandas DataFrame or Series to validate.1594 :param head: validate the first n rows. Rows overlapping with `tail` or1595 `sample` are de-duplicated.1596 :param tail: validate the last n rows. Rows overlapping with `head` or1597 `sample` are de-duplicated.1598 :param sample: validate a random sample of n rows. Rows overlapping1599 with `head` or `tail` are de-duplicated.1600 :param random_state: random seed for the ``sample`` argument.1601 :param lazy: if True, lazily evaluates dataframe against all validation1602 checks and raises a ``SchemaErrors``. Otherwise, raise1603 ``SchemaError`` as soon as one occurs.1604 :param inplace: if True, applies coercion to the object of validation,1605 otherwise creates a copy of the data.1606 :returns: validated DataFrame or Series.1607 """1608 if self._is_inferred:1609 warnings.warn(1610 f"This {type(self)} is an inferred schema that hasn't been "1611 "modified. It's recommended that you refine the schema "1612 "by calling `set_checks` before using it to validate data.",1613 UserWarning,1614 )1615 error_handler = SchemaErrorHandler(lazy)1616 if not inplace:1617 check_obj = check_obj.copy()1618 series = (1619 check_obj1620 if check_utils.is_field(check_obj)1621 else check_obj[self.name]1622 )1623 series = _pandas_obj_to_validate(1624 series, head, tail, sample, random_state1625 )1626 check_obj = _pandas_obj_to_validate(1627 check_obj, head, tail, sample, random_state1628 )1629 if self.name is not None and series.name != self._name:1630 msg = (1631 f"Expected {type(self)} to have name '{self._name}', found "1632 f"'{series.name}'"1633 )1634 error_handler.collect_error(1635 "wrong_field_name",1636 errors.SchemaError(1637 self,1638 check_obj,1639 msg,1640 failure_cases=scalar_failure_case(series.name),1641 check=f"field_name('{self._name}')",1642 ),1643 )1644 if not self._nullable:1645 nulls = series.isna()1646 if nulls.sum() > 0:1647 failed = series[nulls]1648 msg = (1649 f"non-nullable series '{series.name}' contains null "1650 f"values:\n{failed}"1651 )1652 error_handler.collect_error(1653 "series_contains_nulls",1654 errors.SchemaError(1655 self,1656 check_obj,1657 msg,1658 failure_cases=reshape_failure_cases(1659 series[nulls], ignore_na=False1660 ),1661 check="not_nullable",1662 ),1663 )1664 # Check if the series contains duplicate values1665 if self._unique:1666 if type(series).__module__.startswith("databricks.koalas"):1667 duplicates = (1668 series.to_frame().duplicated().reindex(series.index)1669 )1670 # pylint: disable=import-outside-toplevel1671 import databricks.koalas as ks1672 with ks.option_context("compute.ops_on_diff_frames", True):1673 failed = series[duplicates]1674 else:1675 duplicates = series.duplicated()1676 failed = series[duplicates]1677 if duplicates.any():1678 msg = (1679 f"series '{series.name}' contains duplicate values:\n"1680 f"{failed}"1681 )1682 error_handler.collect_error(1683 "series_contains_duplicates",1684 errors.SchemaError(1685 self,1686 check_obj,1687 msg,1688 failure_cases=reshape_failure_cases(failed),1689 check="field_uniqueness",1690 ),1691 )1692 if self._dtype is not None and (1693 not self._dtype.check(pandas_engine.Engine.dtype(series.dtype))1694 ):1695 msg = (1696 f"expected series '{series.name}' to have type {self._dtype}, "1697 + f"got {series.dtype}"1698 )1699 error_handler.collect_error(1700 "wrong_dtype",1701 errors.SchemaError(1702 self,1703 check_obj,1704 msg,1705 failure_cases=scalar_failure_case(str(series.dtype)),1706 check=f"dtype('{self.dtype}')",1707 ),1708 )1709 check_results = []1710 if check_utils.is_field(check_obj):1711 check_obj, check_args = series, [None]1712 else:1713 check_args = [self.name] # type: ignore1714 for check_index, check in enumerate(self.checks):1715 try:1716 check_results.append(1717 _handle_check_results(1718 self, check_index, check, check_obj, *check_args1719 )1720 )1721 except errors.SchemaError as err:1722 error_handler.collect_error("dataframe_check", err)1723 except Exception as err: # pylint: disable=broad-except1724 # catch other exceptions that may occur when executing the1725 # Check1726 err_msg = f'"{err.args[0]}"' if len(err.args) > 0 else ""1727 err_str = f"{err.__class__.__name__}({ err_msg})"1728 msg = (1729 f"Error while executing check function: {err_str}\n"1730 + traceback.format_exc()1731 )1732 error_handler.collect_error(1733 "check_error",1734 errors.SchemaError(1735 self,1736 check_obj,1737 msg,1738 failure_cases=scalar_failure_case(err_str),1739 check=check,1740 check_index=check_index,1741 ),1742 original_exc=err,1743 )1744 if lazy and error_handler.collected_errors:1745 raise errors.SchemaErrors(1746 error_handler.collected_errors, check_obj1747 )1748 assert all(check_results)1749 return check_obj1750 def __call__(1751 self,1752 check_obj: Union[pd.DataFrame, pd.Series],1753 head: Optional[int] = None,1754 tail: Optional[int] = None,1755 sample: Optional[int] = None,1756 random_state: Optional[int] = None,1757 lazy: bool = False,1758 inplace: bool = False,1759 ) -> Union[pd.DataFrame, pd.Series]:1760 """Alias for ``validate`` method."""1761 return self.validate(1762 check_obj, head, tail, sample, random_state, lazy, inplace1763 )1764 def __eq__(self, other):1765 return self.__dict__ == other.__dict__1766 @st.strategy_import_error1767 def strategy(self, *, size=None):1768 """Create a ``hypothesis`` strategy for generating a Series.1769 :param size: number of elements to generate1770 :returns: a strategy that generates pandas Series objects.1771 """1772 return st.series_strategy(1773 self.dtype,1774 checks=self.checks,1775 nullable=self.nullable,1776 unique=self.unique,1777 name=self.name,1778 size=size,1779 )1780 def example(self, size=None) -> pd.Series:1781 """Generate an example of a particular size.1782 :param size: number of elements in the generated Series.1783 :returns: pandas Series object.1784 """1785 # pylint: disable=import-outside-toplevel,cyclic-import,import-error1786 import hypothesis1787 with warnings.catch_warnings():1788 warnings.simplefilter(1789 "ignore",1790 category=hypothesis.errors.NonInteractiveExampleWarning,1791 )1792 return self.strategy(size=size).example()1793 def __repr__(self):1794 return (1795 f"<Schema {self.__class__.__name__}"1796 f"(name={self._name}, type={self.dtype!r})>"1797 )1798 @classmethod1799 def __get_validators__(cls):1800 yield cls._pydantic_validate1801 @classmethod1802 def _pydantic_validate( # type: ignore1803 cls: TSeriesSchemaBase, schema: Any1804 ) -> TSeriesSchemaBase:1805 """Verify that the input is a compatible DataFrameSchema."""1806 if not isinstance(schema, cls): # type: ignore1807 raise TypeError(f"{schema} is not a {cls}.")1808 return cast(TSeriesSchemaBase, schema)1809class SeriesSchema(SeriesSchemaBase):1810 """Series validator."""1811 @deprecate_pandas_dtype1812 def __init__(1813 self,1814 dtype: PandasDtypeInputTypes = None,1815 checks: CheckList = None,1816 index=None,1817 nullable: bool = False,1818 unique: bool = False,1819 allow_duplicates: Optional[bool] = None,1820 coerce: bool = False,1821 name: str = None,1822 pandas_dtype: PandasDtypeInputTypes = None,1823 title: Optional[str] = None,1824 description: Optional[str] = None,1825 ) -> None:1826 """Initialize series schema base object.1827 :param dtype: datatype of the column. If a string is specified,1828 then assumes one of the valid pandas string values:1829 http://pandas.pydata.org/pandas-docs/stable/basics.html#dtypes1830 :param checks: If element_wise is True, then callable signature should1831 be:1832 ``Callable[Any, bool]`` where the ``Any`` input is a scalar element1833 in the column. Otherwise, the input is assumed to be a1834 pandas.Series object.1835 :param index: specify the datatypes and properties of the index.1836 :param nullable: Whether or not column can contain null values.1837 :param unique: Whether or not column can contain duplicate1838 values.1839 :param allow_duplicates: Whether or not column can contain duplicate1840 values.1841 .. warning::1842 This option will be deprecated in 0.8.0. Use the ``unique``1843 argument instead.1844 :param coerce: If True, when schema.validate is called the column will1845 be coerced into the specified dtype. This has no effect on columns1846 where ``pandas_dtype=None``.1847 :param name: series name.1848 :param pandas_dtype: alias of ``dtype`` for backwards compatibility.1849 :param title: A human-readable label for the series.1850 :param description: An arbitrary textual description of the series.1851 .. warning:: This option will be deprecated in 0.8.01852 """1853 super().__init__(1854 dtype,1855 checks,1856 nullable,1857 unique,1858 allow_duplicates,1859 coerce,1860 name,1861 pandas_dtype,1862 title,1863 description,1864 )1865 self.index = index1866 @property1867 def _allow_groupby(self) -> bool:1868 """Whether the schema or schema component allows groupby operations."""1869 return False1870 def validate(1871 self,1872 check_obj: pd.Series,1873 head: Optional[int] = None,1874 tail: Optional[int] = None,1875 sample: Optional[int] = None,1876 random_state: Optional[int] = None,1877 lazy: bool = False,1878 inplace: bool = False,1879 ) -> pd.Series:1880 """Validate a Series object.1881 :param check_obj: One-dimensional ndarray with axis labels1882 (including time series).1883 :param head: validate the first n rows. Rows overlapping with `tail` or1884 `sample` are de-duplicated.1885 :param tail: validate the last n rows. Rows overlapping with `head` or1886 `sample` are de-duplicated.1887 :param sample: validate a random sample of n rows. Rows overlapping1888 with `head` or `tail` are de-duplicated.1889 :param random_state: random seed for the ``sample`` argument.1890 :param lazy: if True, lazily evaluates dataframe against all validation1891 checks and raises a ``SchemaErrors``. Otherwise, raise1892 ``SchemaError`` as soon as one occurs.1893 :param inplace: if True, applies coercion to the object of validation,1894 otherwise creates a copy of the data.1895 :returns: validated Series.1896 :raises SchemaError: when ``DataFrame`` violates built-in or custom1897 checks.1898 :example:1899 >>> import pandas as pd1900 >>> import pandera as pa1901 >>>1902 >>> series_schema = pa.SeriesSchema(1903 ... float, [1904 ... pa.Check(lambda s: s > 0),1905 ... pa.Check(lambda s: s < 1000),1906 ... pa.Check(lambda s: s.mean() > 300),1907 ... ])1908 >>> series = pd.Series([1, 100, 800, 900, 999], dtype=float)1909 >>> print(series_schema.validate(series))1910 0 1.01911 1 100.01912 2 800.01913 3 900.01914 4 999.01915 dtype: float641916 """1917 if not check_utils.is_field(check_obj):1918 raise TypeError(f"expected pd.Series, got {type(check_obj)}")1919 if hasattr(check_obj, "dask"):1920 # special case for dask series1921 if inplace:1922 check_obj = check_obj.pandera.add_schema(self)1923 else:1924 check_obj = check_obj.copy()1925 check_obj = check_obj.map_partitions(1926 self._validate,1927 head=head,1928 tail=tail,1929 sample=sample,1930 random_state=random_state,1931 lazy=lazy,1932 inplace=inplace,1933 meta=check_obj,1934 )1935 return check_obj.pandera.add_schema(self)1936 return self._validate(1937 check_obj=check_obj,1938 head=head,1939 tail=tail,1940 sample=sample,1941 random_state=random_state,1942 lazy=lazy,1943 inplace=inplace,1944 )1945 def _validate(1946 self,1947 check_obj: pd.Series,1948 head: Optional[int] = None,1949 tail: Optional[int] = None,1950 sample: Optional[int] = None,1951 random_state: Optional[int] = None,1952 lazy: bool = False,1953 inplace: bool = False,1954 ) -> pd.Series:1955 if not inplace:1956 check_obj = check_obj.copy()1957 if hasattr(check_obj, "pandera"):1958 check_obj = check_obj.pandera.add_schema(self)1959 error_handler = SchemaErrorHandler(lazy=lazy)1960 if self.coerce:1961 try:1962 check_obj = self.coerce_dtype(check_obj)1963 if hasattr(check_obj, "pandera"):1964 check_obj = check_obj.pandera.add_schema(self)1965 except errors.SchemaError as exc:1966 error_handler.collect_error("dtype_coercion_error", exc)1967 # validate index1968 if self.index:1969 # coerce data type using index schema copy to prevent mutation1970 # of original index schema attribute.1971 _index = copy.deepcopy(self.index)1972 _index.coerce = _index.coerce or self.coerce1973 try:1974 check_obj = _index(1975 check_obj, head, tail, sample, random_state, lazy, inplace1976 )1977 except errors.SchemaError as exc:1978 error_handler.collect_error("dtype_coercion_error", exc)1979 except errors.SchemaErrors as err:1980 for schema_error_dict in err.schema_errors:1981 error_handler.collect_error(1982 "index_check", schema_error_dict["error"]1983 )1984 # validate series1985 try:1986 super().validate(1987 check_obj, head, tail, sample, random_state, lazy, inplace1988 )1989 except errors.SchemaErrors as err:1990 for schema_error_dict in err.schema_errors:1991 error_handler.collect_error(1992 "series_check", schema_error_dict["error"]1993 )1994 if error_handler.collected_errors:1995 raise errors.SchemaErrors(1996 error_handler.collected_errors, check_obj1997 )1998 return check_obj1999 def __call__(2000 self,2001 check_obj: pd.Series,2002 head: Optional[int] = None,2003 tail: Optional[int] = None,2004 sample: Optional[int] = None,2005 random_state: Optional[int] = None,2006 lazy: bool = False,2007 inplace: bool = False,2008 ) -> pd.Series:2009 """Alias for :func:`SeriesSchema.validate` method."""2010 return self.validate(2011 check_obj, head, tail, sample, random_state, lazy, inplace2012 )2013 def __eq__(self, other):2014 return self.__dict__ == other.__dict__2015def _pandas_obj_to_validate(2016 dataframe_or_series: Union[pd.DataFrame, pd.Series],2017 head: Optional[int],2018 tail: Optional[int],2019 sample: Optional[int],2020 random_state: Optional[int],2021) -> Union[pd.DataFrame, pd.Series]:2022 pandas_obj_subsample = []2023 if head is not None:2024 pandas_obj_subsample.append(dataframe_or_series.head(head))2025 if tail is not None:2026 pandas_obj_subsample.append(dataframe_or_series.tail(tail))2027 if sample is not None:2028 pandas_obj_subsample.append(2029 dataframe_or_series.sample(sample, random_state=random_state)...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pandera automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful