How to use dataframe_check method in pandera

Best Python code snippet using pandera_python

taskflowdataschema.py

Source:taskflowdataschema.py Github

copy

Full Screen

1"""2Schema repository for task flow data and task flow data subsets3"""4from pandera.typing import Series, String5import pandas as pd6import pandera as pa7from cdadagbuilder.genflow.utils.check import (8 check_empty_df,9 invalid_seq_number,10)11class TaskFlowDataSchema(pa.SchemaModel):12 """13 Pandera schema model including the validations for fields14 in the configuration files for task flow data dataframe15 """16 variabletablegroupname: Series[String] = pa.Field(17 nullable=False, coerce=True18 )19 variableworkflowstepname: Series[String] = pa.Field(20 nullable=False, coerce=True, allow_duplicates=False21 )22 variableworkflowstepquerytype: Series[String] = pa.Field(23 nullable=False, coerce=True24 )25 variableworkflowstepexecutionorder: Series[int] = pa.Field(26 nullable=False, coerce=True27 )28 variableworkflowstepschema: Series[String] = pa.Field(29 nullable=False, coerce=True30 )31 variableworkflowstepquery: Series[String] = pa.Field(32 nullable=False, coerce=True33 )34 workflowstepqueryparameters: Series[String] = pa.Field(35 nullable=False, coerce=True36 )37 # pylint: disable=R020138 @pa.dataframe_check39 def validate_taskflow_data_dataframe(40 self, task_flow_data: pd.DataFrame41 ) -> bool:42 """43 Raise a error if TaskDataSchema dataframe is empty44 :param task_flow_data:45 :type task_flow_data: DataFrame46 """47 _indicator = check_empty_df(task_flow_data)48 return _indicator == 049class TaskFlowDataSubsetSchema(pa.SchemaModel):50 """51 Pandera schema model including the validations for fields52 in the configuration files for task flow data subset dataframe53 """54 variableworkflowstepname: Series[String] = pa.Field(55 nullable=False, coerce=True, allow_duplicates=False56 )57 variableworkflowstepquerytype: Series[String] = pa.Field(58 nullable=False, coerce=True59 )60 variableworkflowstepexecutionorder: Series[int] = pa.Field(61 nullable=False, coerce=True, ge=162 )63 variableworkflowstepschema: Series[String] = pa.Field(64 nullable=False, coerce=True65 )66 variableworkflowstepquery: Series[String] = pa.Field(67 nullable=False, coerce=True68 )69 workflowstepqueryparameters: Series[String] = pa.Field(70 nullable=False, coerce=True71 )72 # pylint: disable=R020173 @pa.dataframe_check74 def validate_taskflow_data_subset_dataframe(75 self, task_flow_data_subset: pd.DataFrame76 ) -> bool:77 """78 Raise a error if TaskDataFlowSubsetSchema dataframe is empty79 :param task_flow_data_subset:80 :type task_flow_data_subset: DataFrame81 """82 _indicator = check_empty_df(task_flow_data_subset)83 return _indicator == 084 # pylint: disable=R020185 @pa.dataframe_check86 def validate_task_step_buildorder(87 self, task_flow_data_subset: pd.DataFrame88 ) -> bool:89 """90 Identify the missing task step build sequence numbers91 configured in the tasks data config file92 :param task_flow_data_subset:93 :type task_flow_data_subset: DataFrame94 """95 _taskflow_step_buildorder = task_flow_data_subset[96 "variableworkflowstepexecutionorder"97 ].tolist()98 _invalid_num = invalid_seq_number(_taskflow_step_buildorder)99 return _invalid_num == 0100 # pylint: disable=R0201101 @pa.dataframe_check102 def validate_min_task_step_buildorder(103 self, task_flow_data_subset: pd.DataFrame104 ) -> bool:105 """106 Minimum build order has to be 1107 :param task_flow_data_subset:108 :type task_flow_data_subset: DataFrame109 """110 _taskflow_step_buildorder = task_flow_data_subset[111 "variableworkflowstepexecutionorder"112 ].tolist()113 _min_build_order = min(_taskflow_step_buildorder)...

Full Screen

Full Screen

match_kingdoms.py

Source:match_kingdoms.py Github

copy

Full Screen

1import os2import random3import numpy as np4import pandas as pd56tsk1 = []7import pandas as pd8import csv9from datetime import datetime,timedelta1011path='C:\\Users\\Administrator\\Desktop\\sumup1.csv' # 获取文件夹的路径12data = pd.read_csv(path,error_bad_lines=False)1314path='C:\\Users\\Administrator\\Desktop\\check.csv' # 获取文件夹的路径15data_check = pd.read_csv(path,error_bad_lines=False)1617dataframe_1=pd.DataFrame(data)1819print(len(data),len(data_check))20new=[]21dataframe_check=pd.DataFrame(data_check)22for i in range(len(data)):23 ind=024 for j in range(len(data_check)):25 if dataframe_1['city'][i]==dataframe_check['LAD20NM'][j]:26 ind=j27 break28 new.append(dataframe_check['LAD20CD'][ind])29dataframe_1['region']=new ...

Full Screen

Full Screen

correlation_matrix.py

Source:correlation_matrix.py Github

copy

Full Screen

1# /usr/bin/env python2import pandas as pd3def dataframe_check(mat):4 if type(mat) != pd.core.frame.DataFrame:5 raise Exception("argument mat must be a pandas DataFrame")6 7def normalize(mat):8 dataframe_check(mat)9 mat = mat.sub(mat.mean(axis = 1), axis = 0)10 mat = mat.div((mat * mat).sum(axis = 1)**(1 / 2), axis = 0)11 return mat12def pearson_corr_mat(mat):13 dataframe_check(mat)14 mat_norm = normalize(mat)15 return mat_norm.dot(mat_norm.transpose())16def spearman_corr_mat(mat):17 dataframe_check(mat)18 mat_rank = mat.rank(axis = 1)...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pandera automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful