Best Python code snippet using autotest_python
bert_glove_coarse_fine_using_fine.py
Source:bert_glove_coarse_fine_using_fine.py  
1import json2import torch3from torch.utils.data import TensorDataset4from transformers import BertTokenizer5import os, sys, pickle6from bert_class import BERTClass7from sklearn.model_selection import train_test_split8from sklearn.metrics import classification_report9from util import plot_confusion_mat10import matplotlib.pyplot as plt11from bert_glove import train, test, bert_tokenize, create_data_loaders12def order_label_embeddings(label_embeddings, index_to_label, label_word_map, device):13    mod_label_embeds = []14    for i in index_to_label:15        mod_label_embeds.append(label_embeddings[label_word_map[index_to_label[i]]])16    mod_label_embeds = torch.tensor(mod_label_embeds).to(device)17    return mod_label_embeds18def make_train_test(df, children, label_word_map):19    tokens = []20    for ch in children:21        tokens.append(label_word_map[ch])22    reg_exp = "|".join(tokens)23    df_train = df[df['text'].str.contains(reg_exp)].reset_index(drop=True)24    df_test = df[~df['text'].str.contains(reg_exp)].reset_index(drop=True)25    return df_train, df_test26if __name__ == "__main__":27    # basepath = "/Users/dheerajmekala/Work/Coarse2Fine/data/"28    basepath = "/data4/dheeraj/coarse2fine/"29    dataset = "nyt/"30    pkl_dump_dir = basepath + dataset31    # tok_path = pkl_dump_dir + "bert/tokenizer_coarse_fine_using_fine"32    # model_path = pkl_dump_dir + "bert/model/"33    # model_name = "bert_vmf_coarse_fine_using_fine.pt"34    #35    # os.makedirs(tok_path, exist_ok=True)36    # os.makedirs(model_path, exist_ok=True)37    use_gpu = int(sys.argv[1])38    # use_gpu = False39    gpu_id = int(sys.argv[2])40    # Tell pytorch to run this model on the GPU.41    if use_gpu:42        device = torch.device('cuda:' + str(gpu_id))43    else:44        device = torch.device("cpu")45    df_fine = pickle.load(open(pkl_dump_dir + "df_fine.pkl", "rb"))46    parent_to_child = pickle.load(open(pkl_dump_dir + "parent_to_child.pkl", "rb"))47    # df_train, df_test = train_test_split(df_fine, test_size=0.1, stratify=df_fine["label"], random_state=42)48    fine_labels = list(set(df_fine.label.values))49    label_set = fine_labels50    label_to_index = {}51    index_to_label = {}52    for i, l in enumerate(list(label_set)):53        label_to_index[l] = i54        index_to_label[i] = l55    fine_inds = []56    for f in fine_labels:57        fine_inds.append(label_to_index[f])58    label_word_map = json.load(open(pkl_dump_dir + "label_word_map_coarse_fine_using_fine.json", "r"))59    label_embeddings = pickle.load(open(pkl_dump_dir + "label_embeddings.pkl", "rb"))60    label_embeddings = order_label_embeddings(label_embeddings, index_to_label, label_word_map, device)61    # label_embeddings = create_label_embeddings(glove_dir, index_to_label, device, label_word_map)62    sibling_map = {}63    for p in parent_to_child:64        children = parent_to_child[p]65        for ch in children:66            sibling_map[label_to_index[ch]] = [label_to_index[l] for l in children]67    print('Loading BERT tokenizer...', flush=True)68    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)69    input_ids, attention_masks, labels = bert_tokenize(tokenizer, df_fine, label_to_index)70    # Combine the training inputs into a TensorDataset.71    dataset = TensorDataset(input_ids, attention_masks, labels)72    # Create a 90-10 train-validation split.73    train_dataloader, validation_dataloader = create_data_loaders(dataset)74    model = BERTClass()75    model.to(device)76    add_args = {}77    add_args["possible_labels"] = fine_inds78    add_args["contrastive_map"] = sibling_map79    model = train(train_dataloader, validation_dataloader, model, label_embeddings, device, epochs=5,80                  additional_args=add_args)81    print("****************** CLASSIFICATION REPORT ON All Data ********************", flush=True)82    all_true = []83    all_preds = []84    for p in parent_to_child:85        children = parent_to_child[p]86        temp_df = df_fine[df_fine.label.isin(children)].reset_index(drop=True)87        children_ids = [label_to_index[l] for l in children]88        test_add_args = {}89        test_add_args["possible_labels"] = children_ids90        test_add_args["contrastive_map"] = None91        true, preds = test(temp_df, tokenizer, model, label_embeddings, device, label_to_index, index_to_label,92                           test_add_args)93        all_true += true94        all_preds += preds95    print(classification_report(all_true, all_preds), flush=True)96    print("*" * 80, flush=True)97    # tokenizer.save_pretrained(tok_path)...experiment.py
Source:experiment.py  
1# experiment.py2# Marco Lui Feb 20113#4# This class represents an experiment. Its intention is to act as the interface between the user5# and the udnerlying store at the level of managing tasks and results. The dataset abstraction is6# delegated to the DataProxy object.7from hydrat import config8from hydrat.datamodel import TaskSetResult, Result, BasicTask9from hydrat.common.pb import ProgressIter10import cPickle11import multiprocessing as mp12class ExperimentFold(object):13  def __init__(self, task, learner, add_args={}):14    self.task = task15    self.learner = learner16    self.add_args = add_args17  def __getstate__(self):18    # TODO: Potentially use a disk-backed implementation of tasks19    task = BasicTask.from_task(self.task)20    return {'task':task, 'learner':self.learner, 'add_args':self.add_args}21  @property22  def classifier(self):23    train_add_args = dict( (k, v[self.task.train_indices]) for k,v in self.add_args.items())24    classifier = self.learner( self.task.train_vectors, self.task.train_classes,\25        sequence=self.task.train_sequence, indices=self.task.train_indices, **train_add_args)26    return classifier27  @property28  def result(self):29    classifier = self.classifier30    test_add_args = dict( (k, v[self.task.test_indices]) for k,v in self.add_args.items())31    classifications = classifier( self.task.test_vectors,\32        sequence=self.task.test_sequence, indices=self.task.test_indices, **test_add_args)33    return Result.from_task(self.task, classifications, dict(classifier.metadata))34def get_result(fold):35  """36  Needed for parallelized implementation, as we need a top-level function to pickle.37  """38  return fold.result39# TODO: Refactor in a way that allows access to per-fold classifiers40class Experiment(TaskSetResult):41  def __init__(self, taskset, learner=None, parallel=None):42    # TODO: Why is learner optional?43    self.taskset = taskset44    self.learner = learner45    self._results = None46    self.parallel = parallel if parallel is not None else config.getboolean('parameters', 'parallel_classify')47  @property48  def metadata(self):49    """ Result object metadata """50    result_metadata = dict(self.taskset.metadata)51    result_metadata['learner'] = self.learner.__name__52    result_metadata['learner_params'] = self.learner.params53    return result_metadata 54    55  @property56  def results(self):57    if self._results is None:58      self.run()59    return self._results60  @property61  def folds(self):62    folds = []63    for task in self.taskset:64      folds.append(ExperimentFold(task, self.learner))65    return folds66  def run(self, add_args = None):67    # TODO: parallelize over folds?68    results = []69    # TODO: Nicer in-progress output70    print "Experiment: %s %s" % (self.learner.__name__, self.taskset.metadata)71    try:72      if not self.parallel or not self.learner.is_pickleable():73        # TODO: Should we define a custom exception for this?74        raise cPickle.UnpickleableError75      cPickle.dumps(self.learner)76      # TODO: closing a multiprocessing pool produces an unsightly error msg77      # TODO: it seems that explicitly closing it does not cause this error msg78      pool = mp.Pool(config.getint('parameters','job_count'))79      for result in ProgressIter(pool.imap_unordered(get_result, self.folds), 'PARALLEL', maxval=len(self.taskset)):80        results.append(result)81      pool.close()82      pool.join() # This waits for all the pool members to exit83      results.sort(key=lambda x:x.metadata['index'])84    except (cPickle.UnpickleableError, TypeError):85      for fold in ProgressIter(self.folds, 'SERIES'):86        results.append(fold.result)87    self._results = results...crawl_test.py
Source:crawl_test.py  
...21            (["--random", "http://example.com"], True),22            ([], True),23        ],24    )25    def test_add_args(self, args, error):26        """test_add_args27        """28        parser = ArgumentParser()29        CrawlCommand().add_args(parser)30        if error:31            with pytest.raises(SystemExit):32                parser.parse_args(args)33        else:34            parsed_args = parser.parse_args(args)35            assert parsed_args.seed == args[1]36            if "--filename" in args or "-f" in args:37                assert parsed_args.filename == args[3]38            else:39                # Defaults to datetime format...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
