How to use remove_columns method in pandera

Best Python code snippet using pandera_python

main_geda.py

Source:main_geda.py Github

copy

Full Screen

...80# ----------------------------------------81# GRÁFICAS GENERALES PARA TODO EL DATASET82# Nota: gráficas tomadas de las plantillas de seaborn83# Grafica de correlaciones generales - Pearson84def remove_columns(df,col_name):85 return df.drop(columns=col_name)86def heatmap_corr_pearson(dataframe):87 '''88 Genera el heatmat de correlaciones entre todas las variables del dataframe en formato pandas89 '''90 sns.set(style="white")91 # remover anio, latitude, longitude, gid92 dataframe = remove_columns(dataframe,"anio")93 dataframe = remove_columns(dataframe,"latitude")94 dataframe = remove_columns(dataframe,"longitude")95 dataframe = remove_columns(dataframe,"gid")96 corr = dataframe.corr(method = "pearson") # se computa la correlación de las variables97 # Generate a mask for the upper triangle98 mask = np.zeros_like(corr, dtype=np.bool)99 mask[np.triu_indices_from(mask)] = True100 # Set up the matplotlib figure101 f, ax = plt.subplots(figsize=(11, 9))102 # Draw the heatmap with the mask and correct aspect ratio103 sns.heatmap(corr, mask=mask, cmap="YlGnBu", vmax=.3, center=0,104 square=True, linewidths=.5, cbar_kws={"shrink": .5})105# Grafica de correlaciones generales - Kendall106def heatmap_corr_kendall(dataframe):107 '''108 Genera el heatmat de correlaciones entre todas las variables del dataframe en formato pandas109 '''110 sns.set(style="white")111 dataframe = remove_columns(dataframe,"anio")112 dataframe = remove_columns(dataframe,"latitude")113 dataframe = remove_columns(dataframe,"longitude")114 dataframe = remove_columns(dataframe,"gid")115 corr = dataframe.corr(method = "kendall") # se computa la correlación de las variables116 # Generate a mask for the upper triangle117 mask = np.zeros_like(corr, dtype=np.bool)118 mask[np.triu_indices_from(mask)] = True119 # Set up the matplotlib figure120 f, ax = plt.subplots(figsize=(11, 9))121 # Draw the heatmap with the mask and correct aspect ratio122 sns.heatmap(corr, mask=mask, cmap="YlGnBu", vmax=.3, center=0,123 square=True, linewidths=.5) #cbar_kws={"shrink": .5}124# Grafica de correlaciones generales - Spearman125def heatmap_corr_spearman(dataframe):126 '''127 Genera el heatmat de correlaciones entre todas las variables del dataframe en formato pandas128 '''129 sns.set(style="white")130 dataframe = remove_columns(dataframe,"anio")131 dataframe = remove_columns(dataframe,"latitude")132 dataframe = remove_columns(dataframe,"longitude")133 dataframe = remove_columns(dataframe,"gid")134 corr = dataframe.corr(method = "spearman") # se computa la correlación de las variables135 # Generate a mask for the upper triangle136 mask = np.zeros_like(corr, dtype=np.bool)137 mask[np.triu_indices_from(mask)] = True138 # Set up the matplotlib figure139 f, ax = plt.subplots(figsize=(11, 9))140 # Draw the heatmap with the mask and correct aspect ratio141 sns.heatmap(corr, mask=mask, cmap="YlGnBu", vmax=.3, center=0,142 square=True, linewidths=.5, cbar_kws={"shrink": .5})143# Gráficas para relacionar categóricas con numéricas144def boxplot_category(x, y, df):145 sns.catplot(x, y,data=df, saturation=.5,kind="bar", aspect=.6)146 plt.show()147def plot_three_cat(x, y, z, df):148 g = sns.catplot(x, y, hue=z, data=df, height=10)149 g = sns.boxplot(x, y, data=df, whis=np.inf)150 # plt.show()151# Adicionales152def df_numerico(df):153 '''154 Genera un dataframe solamente con las variables numéricas.155 Adicionalmente remueve latitude y longitude.156 '''157 df = remove_columns(df,"latitude")158 df = remove_columns(df,"longitude")159 for col in df.columns:160 if df[col].dtypes != ("float64" or "int64"):161 df = remove_columns(df,col)162 else:163 continue164 return df165def df_categorico(df):166 '''167 Genera un dataframe solamente con las variables categñoricas.168 Adicionalmente remueve la variable anio.169 '''170 df = remove_columns(df,"anio")171 for col in df.columns:172 if df[col].dtypes == ("float64" or "int64"):173 df = remove_columns(df,col)174 else:175 continue...

Full Screen

Full Screen

adapter_train.py

Source:adapter_train.py Github

copy

Full Screen

1from pathlib import Path2from transformers import TrainingArguments, default_data_collator3from transformers.adapters import BartAdapterModel4from datasets import load_dataset, Split, Dataset5from trainer.curriculum_adapter_trainer import CurriculumAdapterTrainer6from data.dataset.tokenize import tokenization, tokenizer7from data.dataset.data_augmentations import (8 flatten_conversation,9 mask_delta_beliefs,10 random_mask_beliefs,11 mask_context_belief_entities,12 random_mask_utterance,13)14from gpu import get_device15from utils import print_stage16def test_compute_metrics(eval_predictions):17 logits, hidden_values = eval_predictions.predictions18 print(tokenizer.batch_decode(logits.argmax(-1)))19 return {"score": 100}20def train():21 device, _ = get_device()22 name = "bart_finetune_cur"23 BATCH_SIZE = 824 EPOCHS = 125 data_dir = Path("resources/bart/")26 data_files = {27 Split.TRAIN: str((data_dir / "train.history_belief").absolute()),28 Split.VALIDATION: str((data_dir / "val.history_belief").absolute()),29 Split.TEST: str((data_dir / "test.history_belief").absolute()),30 }31 dataset = load_dataset(32 "data/dataset/multiwoz_dataset.py", data_files=data_files33 )34 print_stage("Flattening Conversation")35 dataset = dataset.map(36 flatten_conversation,37 batched=True,38 remove_columns=dataset["train"].column_names,39 )40 41 print_stage("Masking Difference of Dialogue States")42 masked_deltas = dataset["train"].map(43 mask_delta_beliefs, remove_columns="turn"44 )45 masked_deltas = masked_deltas.map(46 tokenization, batched=True, remove_columns=masked_deltas.column_names,47 )48 print_stage("Masking Beliefs (Easy)")49 random_masked_beliefs_easy = dataset["train"].map(50 lambda d: random_mask_beliefs(d, 0.15), remove_columns="turn"51 )52 random_masked_beliefs_easy = random_masked_beliefs_easy.map(53 tokenization,54 batched=True,55 remove_columns=random_masked_beliefs_easy.column_names,56 )57 58 print_stage("Masking Utterances (Easy)")59 random_masked_utterances_easy = dataset["train"].map(60 lambda d: random_mask_utterance(d, 0.15), remove_columns="turn"61 )62 random_masked_utterances_easy = random_masked_utterances_easy.map(63 tokenization,64 batched=True,65 remove_columns=random_masked_utterances_easy.column_names,66 )67 print_stage("Masking Belief Entities in the Context")68 masked_context_belief_entities = dataset["train"].map(69 mask_context_belief_entities, remove_columns="turn"70 )71 masked_context_belief_entities = masked_context_belief_entities.map(72 tokenization,73 batched=True,74 remove_columns=masked_context_belief_entities.column_names,75 )76 print_stage("Masking Beliefs (Hard)")77 random_masked_beliefs_hard = dataset["train"].map(78 lambda d: random_mask_beliefs(d, 0.5), remove_columns="turn"79 )80 random_masked_beliefs_hard = random_masked_beliefs_hard.map(81 tokenization,82 batched=True,83 remove_columns=random_masked_beliefs_hard.column_names,84 )85 86 print_stage("Masking Utterances (Hard)")87 random_masked_utterances_hard = dataset["train"].map(88 lambda d: random_mask_utterance(d, 0.5), remove_columns="turn"89 )90 random_masked_utterances_hard = random_masked_utterances_hard.map(91 tokenization,92 batched=True,93 remove_columns=random_masked_utterances_hard.column_names,94 )95 96 print_stage("Masking All Belief Values")97 masked_beliefs_final = dataset.map(98 lambda d: random_mask_beliefs(d, 1), remove_columns="turn"99 )100 masked_beliefs_final = masked_beliefs_final.map(101 tokenization,102 batched=True,103 remove_columns=masked_beliefs_final.column_names, # this removes ['train'], ['val'] ['test']104 )105 # sample_dataset = Dataset.from_dict(masked_deltas["validation"][:2])106 # sample_dataset_2 = Dataset.from_dict(random_masked_beliefs_easy["validation"][50:55])107 # sample_dataset_3 = Dataset.from_dict(random_masked_utterances_easy["validation"][50:55])108 # sample_dataset_4 = Dataset.from_dict(masked_context_belief_entities["validation"][50:55])109 # train_set = sample_dataset.map(110 # tokenization, batched=True, remove_columns=sample_dataset.column_names111 # )112 # # , remove_columns='turn')113 # train_set_2 = sample_dataset_2.map(114 # tokenization,115 # batched=True,116 # remove_columns=sample_dataset_2.column_names,117 # )118 # train_set_3 = sample_dataset_3.map(119 # tokenization,120 # batched=True,121 # remove_columns=sample_dataset_3.column_names,122 # )123 # train_set_4 = sample_dataset_4.map(124 # tokenization,125 # batched=True,126 # remove_columns=sample_dataset_4.column_names,127 # )128 curriculum_datasets = [129 masked_deltas,130 random_masked_beliefs_easy,131 random_masked_utterances_easy,132 masked_context_belief_entities,133 random_masked_beliefs_hard,134 random_masked_utterances_hard,135 ]136 model = BartAdapterModel.from_pretrained(137 "facebook/bart-base"138 ).to(device)139 model.resize_token_embeddings(len(tokenizer))140 # add and activate adapter141 model.add_adapter("dst")142 model.train_adapter("dst")143 # setup trainer144 # same as huggingface trainer145 args = TrainingArguments(146 output_dir=f"checkpoints/{name}",147 evaluation_strategy="epoch",148 save_strategy="epoch",149 learning_rate=2e-5, # smaller lr150 per_device_train_batch_size=BATCH_SIZE,151 per_device_eval_batch_size=BATCH_SIZE,152 num_train_epochs=EPOCHS,153 weight_decay=0.01,154 dataloader_num_workers=0,155 local_rank=-1,156 load_best_model_at_end=True,157 # resume_from_checkpoint=f"{name}/checkpoint-19000",158 )159 data_collator = default_data_collator160 trainer = CurriculumAdapterTrainer(161 curriculum_datasets,162 model,163 args,164 train_dataset=masked_beliefs_final["train"],165 eval_dataset=masked_beliefs_final["validation"],166 data_collator=data_collator,167 # compute_metrics=test_compute_metrics168 # callbacks=[MyCallback], # We can either pass the callback class this way or an instance of it (MyCallback())169 )170 trainer.curriculum_train()171if __name__ == "__main__":...

Full Screen

Full Screen

training_dataloader.py

Source:training_dataloader.py Github

copy

Full Screen

...10 return self.load_to_data("train")11 def load_about_data(self, split):12 funpedia = load_dataset('md_gender_bias', 'funpedia', split=split)13 funpedia = funpedia.rename_column('gender', 'label')14 funpedia = funpedia.remove_columns("title")15 funpedia = funpedia.remove_columns("persona")16 funpedia = funpedia.filter(lambda row: row['label'] != 0)17 funpedia = funpedia.map(self.modifyAboutLables)18 # imageChat = load_dataset('md_gender_bias', 'image_chat', split=split)19 wizard = load_dataset('md_gender_bias', 'wizard', split=split)20 wizard = wizard.rename_column('gender', 'label')21 wizard = wizard.remove_columns("chosen_topic")22 wizard = wizard.filter(lambda row: row['label'] != 0)23 wizard = wizard.map(self.modifyAboutLables)24 print (funpedia.features.type)25 print (wizard.features.type)26 assert funpedia.features.type == wizard.features.type27 return concatenate_datasets([wizard, funpedia])28 def load_as_data(self, split):29 yelp = load_dataset('md_gender_bias', 'yelp_inferred', split=split)30 yelp = yelp.rename_column('binary_label', 'label')31 yelp = yelp.remove_columns("binary_score")32 yelp = yelp.filter(lambda row: row['label'] == 0)33 yelp = yelp.map(self.modifyAsLables)34 convai2 = load_dataset('md_gender_bias', 'convai2_inferred', split=split)35 convai2 = convai2.rename_column('binary_label', 'label')36 convai2 = convai2.remove_columns("binary_score")37 convai2 = convai2.remove_columns("ternary_score")38 convai2 = convai2.remove_columns("ternary_label")39 convai2 = convai2.filter(lambda row: row['label'] == 0)40 convai2 = convai2.map(self.modifyAsLables)41 assert convai2.features.type == yelp.features.type42 return concatenate_datasets([convai2, yelp])43 def load_to_data(self, split):44 light = load_dataset('md_gender_bias', 'light_inferred', split=split)45 light = light.rename_column('ternary_label', 'label')46 light = light.remove_columns("binary_score")47 light = light.remove_columns("ternary_score")48 light = light.remove_columns("binary_label")49 light = light.filter(lambda row: row['label'] != 2)50 openSub = load_dataset('md_gender_bias', 'opensubtitles_inferred', split=split)51 openSub = openSub.rename_column('ternary_label', 'label')52 openSub = openSub.remove_columns("binary_score")53 openSub = openSub.remove_columns("ternary_score")54 openSub = openSub.remove_columns("binary_label")55 openSub = openSub.filter(lambda row: row['label'] != 2)56 light = light.map(self.modifyToLables)57 openSub = openSub.map(self.modifyToLables)58 return concatenate_datasets([light, openSub])59 def modifyAboutLables(self, row):60 if row['label'] == 0:61 row['label'] = 662 elif row['label'] == 1:63 row['label'] = 064 else:65 row['label'] = 166 return row67 def modifyAsLables(self, row):68 if row['label'] == 0:...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pandera automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful