How to use test_step_time method in grail

Best Python code snippet using grail_python

main.py

Source:main.py Github

copy

Full Screen

1# Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.2#3# Licensed under the Apache License, Version 2.0 (the "License");4# you may not use this file except in compliance with the License.5# You may obtain a copy of the License at6#7# http://www.apache.org/licenses/LICENSE-2.08#9# Unless required by applicable law or agreed to in writing, software10# distributed under the License is distributed on an "AS IS" BASIS,11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.12# See the License for the specific language governing permissions and13# limitations under the License.14import datetime15from time import time16import dllogger17import numpy as np18import torch19from absl import app, flags20from apex import amp21import dlrm.scripts.utils as utils22from dlrm.data.data_loader import get_data_loaders23from dlrm.data.utils import get_categorical_feature_sizes, prefetcher24from dlrm.model.single import Dlrm25from dlrm.utils.checkpointing.serial import SerialCheckpointWriter, make_serial_checkpoint_writer, \26 make_serial_checkpoint_loader27FLAGS = flags.FLAGS28# Basic run settings29flags.DEFINE_enum("mode", default='train', enum_values=['train', 'test', 'inference_benchmark'],30 help="Select task to be performed")31flags.DEFINE_integer("seed", 12345, "Random seed")32# Training schedule flags33flags.DEFINE_integer("batch_size", 32768, "Batch size used for training")34flags.DEFINE_integer("test_batch_size", 32768, "Batch size used for testing/validation")35flags.DEFINE_float("lr", 28, "Base learning rate")36flags.DEFINE_integer("epochs", 1, "Number of epochs to train for")37flags.DEFINE_integer("max_steps", None, "Stop training after doing this many optimization steps")38flags.DEFINE_integer("warmup_factor", 0, "Learning rate warmup factor. Must be a non-negative integer")39flags.DEFINE_integer("warmup_steps", 6400, "Number of warmup optimization steps")40flags.DEFINE_integer("decay_steps", 80000, "Polynomial learning rate decay steps. If equal to 0 will not do any decaying")41flags.DEFINE_integer("decay_start_step", 64000,42 "Optimization step after which to start decaying the learning rate, if None will start decaying right after the warmup phase is completed")43flags.DEFINE_integer("decay_power", 2, "Polynomial learning rate decay power")44flags.DEFINE_float("decay_end_lr", 0, "LR after the decay ends")45# Model configuration46flags.DEFINE_enum("embedding_type", "joint_fused", ["joint", "joint_fused", "joint_sparse", "multi_table"],47 help="The type of the embedding operation to use")48flags.DEFINE_integer("embedding_dim", 128, "Dimensionality of embedding space for categorical features")49flags.DEFINE_list("top_mlp_sizes", [1024, 1024, 512, 256, 1], "Linear layer sizes for the top MLP")50flags.DEFINE_list("bottom_mlp_sizes", [512, 256, 128], "Linear layer sizes for the bottom MLP")51flags.DEFINE_enum("interaction_op", default="cuda_dot", enum_values=["cuda_dot", "dot", "cat"],52 help="Type of interaction operation to perform.")53flags.DEFINE_string(54 "dataset", None,55 "Full path to binary dataset. Must include files such as: train_data.bin, test_data.bin")56flags.DEFINE_enum("dataset_type", default="split", enum_values=['binary', 'split', 'synthetic_gpu', 'synthetic_disk'],57 help='The type of the dataset to use')58flags.DEFINE_string("synthetic_dataset_dir", "/tmp/dlrm_sythetic_dataset", "Default synthetic disk dataset directory")59flags.DEFINE_list("synthetic_dataset_table_sizes", default=','.join(26 * [str(10**5)]),60 help="Embedding table sizes to use with the synthetic dataset")61flags.DEFINE_integer("synthetic_dataset_num_entries", default=int(2**15 * 1024), # 1024 batches by default62 help="Number of samples per epoch for the synthetic dataset")63flags.DEFINE_boolean("shuffle_batch_order", False, "Read batch in train dataset by random order", short_name="shuffle")64flags.DEFINE_integer("num_numerical_features", 13,65 "Number of numerical features in the dataset. Defaults to 13 for the Criteo Terabyte Dataset")66flags.DEFINE_integer("max_table_size", None,67 "Maximum number of rows per embedding table, by default equal to the number of unique values for each categorical variable")68flags.DEFINE_boolean("hash_indices", False,69 "If True the model will compute `index := index % table size` to ensure that the indices match table sizes")70flags.DEFINE_float("dataset_subset", None,71 "Use only a subset of the training data. If None (default) will use all of it. Must be either None, or a float in range [0,1]")72# Checkpointing73flags.DEFINE_string("load_checkpoint_path", None, "Path from which to load a checkpoint")74flags.DEFINE_string("save_checkpoint_path", None, "Path to which to save the training checkpoints")75# Saving and logging flags76flags.DEFINE_string("output_dir", "/tmp", "Path where to save the checkpoints")77flags.DEFINE_string("log_path", "./log.json", "Destination for the log file with various results and statistics")78flags.DEFINE_integer("test_freq", None, "Number of optimization steps between validations. If None will test after each epoch")79flags.DEFINE_float("test_after", 0, "Don't test the model unless this many epochs has been completed")80flags.DEFINE_integer("print_freq", 200, "Number of optimizations steps between printing training status to stdout")81flags.DEFINE_integer("benchmark_warmup_steps", 0, "Number of initial iterations to exclude from throughput measurements")82# Machine setting flags83flags.DEFINE_string("base_device", "cuda", "Device to run the majority of the model operations")84flags.DEFINE_boolean("amp", False, "If True the script will use Automatic Mixed Precision")85flags.DEFINE_float("loss_scale", 1024, "Static loss scale for Mixed Precision Training")86# inference benchmark87flags.DEFINE_list("inference_benchmark_batch_sizes", default=[1, 64, 4096],88 help="Batch sizes for inference throughput and latency measurements")89flags.DEFINE_integer("inference_benchmark_steps", 200,90 "Number of steps for measuring inference latency and throughput")91flags.DEFINE_float("auc_threshold", None, "Stop the training after achieving this AUC")92flags.DEFINE_boolean("optimized_mlp", True, "Use an optimized implementation of MLP from apex")93def validate_flags():94 if FLAGS.max_table_size is not None and not FLAGS.hash_indices:95 raise ValueError('Hash indices must be True when setting a max_table_size')96 if FLAGS.base_device == 'cpu':97 if FLAGS.embedding_type in ('joint_fused', 'joint_sparse'):98 print('WARNING: CUDA joint embeddings are not supported on CPU')99 FLAGS.embedding_type = 'joint'100 if FLAGS.amp:101 print('WARNING: Automatic mixed precision not supported on CPU')102 FLAGS.amp = False103 if FLAGS.optimized_mlp:104 print('WARNING: Optimized MLP is not supported on CPU')105 FLAGS.optimized_mlp = False106def is_data_prefetching_enabled() -> bool:107 return FLAGS.base_device == 'cuda'108def create_model():109 print("Creating model")110 FLAGS.top_mlp_sizes = [int(s) for s in FLAGS.top_mlp_sizes]111 FLAGS.bottom_mlp_sizes = [int(s) for s in FLAGS.bottom_mlp_sizes]112 model_config = {113 'top_mlp_sizes': FLAGS.top_mlp_sizes,114 'bottom_mlp_sizes': FLAGS.bottom_mlp_sizes,115 'embedding_type': FLAGS.embedding_type,116 'embedding_dim': FLAGS.embedding_dim,117 'interaction_op': FLAGS.interaction_op,118 'categorical_feature_sizes': get_categorical_feature_sizes(FLAGS),119 'num_numerical_features': FLAGS.num_numerical_features,120 'hash_indices': FLAGS.hash_indices,121 'use_cpp_mlp': FLAGS.optimized_mlp,122 'fp16': FLAGS.amp,123 'base_device': FLAGS.base_device,124 }125 model = Dlrm.from_dict(model_config)126 print(model)127 model.to(FLAGS.base_device)128 if FLAGS.load_checkpoint_path is not None:129 checkpoint_loader = make_serial_checkpoint_loader(130 embedding_indices=range(len(get_categorical_feature_sizes(FLAGS))),131 device="cpu"132 )133 checkpoint_loader.load_checkpoint(model, FLAGS.load_checkpoint_path)134 model.to(FLAGS.base_device)135 return model136def main(argv):137 validate_flags()138 torch.manual_seed(FLAGS.seed)139 utils.init_logging(log_path=FLAGS.log_path)140 dllogger.log(data=FLAGS.flag_values_dict(), step='PARAMETER')141 data_loader_train, data_loader_test = get_data_loaders(FLAGS)142 scaled_lr = FLAGS.lr / FLAGS.loss_scale if FLAGS.amp else FLAGS.lr143 model = create_model()144 optimizer = torch.optim.SGD(model.parameters(), lr=scaled_lr)145 if FLAGS.amp and FLAGS.mode == 'train':146 (model.top_model, model.bottom_model.mlp), optimizer = amp.initialize([model.top_model, model.bottom_model.mlp],147 optimizer, opt_level="O2", loss_scale=1)148 elif FLAGS.amp:149 model = model.half()150 loss_fn = torch.nn.BCEWithLogitsLoss(reduction="mean")151 if FLAGS.mode == 'test':152 loss, auc, test_step_time = evaluate(model, loss_fn, data_loader_test)153 avg_test_throughput = FLAGS.batch_size / test_step_time154 results = {'auc': auc,155 'avg_inference_latency': test_step_time,156 'average_test_throughput': avg_test_throughput}157 dllogger.log(data=results, step=tuple())158 print(f"Finished testing. Test Loss {loss:.4f}, auc {auc:.4f}")159 return160 if FLAGS.mode == 'inference_benchmark':161 results = {}162 if FLAGS.amp:163 # can use pure FP16 for inference164 model = model.half()165 for batch_size in FLAGS.inference_benchmark_batch_sizes:166 batch_size = int(batch_size)167 FLAGS.test_batch_size = batch_size168 _, benchmark_data_loader = get_data_loaders(FLAGS)169 latencies = inference_benchmark(model=model, data_loader=benchmark_data_loader,170 num_batches=FLAGS.inference_benchmark_steps)171 print("All inference latencies: {}".format(latencies))172 mean_latency = np.mean(latencies)173 mean_inference_throughput = batch_size / mean_latency174 subresult = {f'mean_inference_latency_batch_{batch_size}': mean_latency,175 f'mean_inference_throughput_batch_{batch_size}': mean_inference_throughput}176 results.update(subresult)177 dllogger.log(data=results, step=tuple())178 print(f"Finished inference benchmark.")179 return180 if FLAGS.mode == 'train':181 train(model, loss_fn, optimizer, data_loader_train, data_loader_test, scaled_lr)182def maybe_save_checkpoint(checkpoint_writer: SerialCheckpointWriter, model, path):183 if path is None:184 return185 print(f'Saving a checkpoint to {path}')186 begin = time()187 checkpoint_writer.save_checkpoint(model, path)188 end = time()189 print(f'Checkpoint saving took {end-begin:,.2f} [s]')190def train(model, loss_fn, optimizer, data_loader_train, data_loader_test, scaled_lr):191 """Train and evaluate the model192 Args:193 model (dlrm):194 loss_fn (torch.nn.Module): Loss function195 optimizer (torch.nn.optim):196 data_loader_train (torch.utils.data.DataLoader):197 data_loader_test (torch.utils.data.DataLoader):198 """199 model.train()200 prefetching_enabled = is_data_prefetching_enabled()201 base_device = FLAGS.base_device202 print_freq = FLAGS.print_freq203 steps_per_epoch = len(data_loader_train)204 checkpoint_writer = make_serial_checkpoint_writer(205 embedding_indices=range(len(get_categorical_feature_sizes(FLAGS))),206 config=FLAGS.flag_values_dict()207 )208 test_freq = FLAGS.test_freq if FLAGS.test_freq is not None else steps_per_epoch - 1209 metric_logger = utils.MetricLogger(delimiter=" ")210 metric_logger.add_meter('loss', utils.SmoothedValue(window_size=1, fmt='{value:.4f}'))211 metric_logger.add_meter('step_time', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))212 metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.4f}'))213 if prefetching_enabled:214 data_stream = torch.cuda.Stream()215 timer = utils.StepTimer()216 best_auc = 0217 best_epoch = 0218 start_time = time()219 timer.click()220 for epoch in range(FLAGS.epochs):221 input_pipeline = iter(data_loader_train)222 if prefetching_enabled:223 input_pipeline = prefetcher(input_pipeline, data_stream)224 for step, batch in enumerate(input_pipeline):225 global_step = steps_per_epoch * epoch + step226 numerical_features, categorical_features, click = batch227 utils.lr_step(optimizer, num_warmup_iter=FLAGS.warmup_steps, current_step=global_step + 1,228 base_lr=scaled_lr, warmup_factor=FLAGS.warmup_factor,229 decay_steps=FLAGS.decay_steps, decay_start_step=FLAGS.decay_start_step)230 if FLAGS.max_steps and global_step > FLAGS.max_steps:231 print(f"Reached max global steps of {FLAGS.max_steps}. Stopping.")232 break233 if prefetching_enabled:234 torch.cuda.synchronize()235 output = model(numerical_features, categorical_features).squeeze().float()236 loss = loss_fn(output, click.squeeze())237 # Setting grad to None is faster than zero_grad()238 for param_group in optimizer.param_groups:239 for param in param_group['params']:240 param.grad = None241 if FLAGS.amp:242 loss *= FLAGS.loss_scale243 with amp.scale_loss(loss, optimizer) as scaled_loss:244 scaled_loss.backward()245 else:246 loss.backward()247 optimizer.step()248 if step % print_freq == 0 and step > 0:249 loss_value = loss.item()250 timer.click()251 if global_step < FLAGS.benchmark_warmup_steps:252 metric_logger.update(253 loss=loss_value, lr=optimizer.param_groups[0]["lr"])254 else:255 unscale_factor = FLAGS.loss_scale if FLAGS.amp else 1256 metric_logger.update(257 loss=loss_value / unscale_factor,258 step_time=timer.measured / FLAGS.print_freq,259 lr=optimizer.param_groups[0]["lr"] * unscale_factor260 )261 if global_step < FLAGS.benchmark_warmup_steps:262 print(f'Warming up, step [{global_step}/{FLAGS.benchmark_warmup_steps}]')263 continue264 eta_str = datetime.timedelta(seconds=int(metric_logger.step_time.global_avg * (steps_per_epoch - step)))265 metric_logger.print(266 header=f"Epoch:[{epoch}/{FLAGS.epochs}] [{step}/{steps_per_epoch}] eta: {eta_str}")267 if (global_step % test_freq == 0 and global_step > 0 and268 global_step / steps_per_epoch >= FLAGS.test_after):269 loss, auc, test_step_time = evaluate(model, loss_fn, data_loader_test)270 print(f"Epoch {epoch} step {step}. Test loss {loss:.5f}, auc {auc:.6f}")271 if auc > best_auc:272 best_auc = auc273 best_epoch = epoch + ((step + 1) / steps_per_epoch)274 maybe_save_checkpoint(checkpoint_writer, model, FLAGS.save_checkpoint_path)275 if FLAGS.auc_threshold and auc >= FLAGS.auc_threshold:276 stop_time = time()277 run_time_s = int(stop_time - start_time)278 print(f"Hit target accuracy AUC {FLAGS.auc_threshold} at epoch "279 f"{global_step/steps_per_epoch:.2f} in {run_time_s}s. "280 f"Average speed {global_step * FLAGS.batch_size / run_time_s:.1f} records/s.")281 return282 stop_time = time()283 run_time_s = int(stop_time - start_time)284 print(f"Finished training in {run_time_s}s. "285 f"Average speed {global_step * FLAGS.batch_size / run_time_s:.1f} records/s.")286 avg_throughput = FLAGS.batch_size / metric_logger.step_time.avg287 results = {'best_auc' : best_auc,288 'best_epoch' : best_epoch,289 'average_train_throughput' : avg_throughput}290 if 'test_step_time' in locals():291 avg_test_throughput = FLAGS.test_batch_size / test_step_time292 results['average_test_throughput'] = avg_test_throughput293 dllogger.log(data=results, step=tuple())294def evaluate(model, loss_fn, data_loader):295 """Test dlrm model296 Args:297 model (dlrm):298 loss_fn (torch.nn.Module): Loss function299 data_loader (torch.utils.data.DataLoader):300 """301 model.eval()302 print_freq = FLAGS.print_freq303 prefetching_enabled = is_data_prefetching_enabled()304 steps_per_epoch = len(data_loader)305 metric_logger = utils.MetricLogger(delimiter=" ")306 metric_logger.add_meter('loss', utils.SmoothedValue(window_size=1, fmt='{avg:.4f}'))307 metric_logger.add_meter('step_time', utils.SmoothedValue(window_size=1, fmt='{avg:.4f}'))308 if prefetching_enabled:309 data_stream = torch.cuda.Stream()310 with torch.no_grad():311 y_true = []312 y_score = []313 timer = utils.StepTimer()314 timer.click()315 input_pipeline = iter(data_loader)316 if prefetching_enabled:317 input_pipeline = prefetcher(input_pipeline, data_stream)318 for step, (numerical_features, categorical_features, click) in enumerate(input_pipeline):319 if FLAGS.amp:320 numerical_features = numerical_features.half()321 if prefetching_enabled:322 torch.cuda.synchronize()323 output = model(numerical_features, categorical_features).squeeze()324 loss = loss_fn(output, click)325 y_true.append(click)326 y_score.append(output)327 loss_value = loss.item()328 timer.click()329 if timer.measured is not None:330 metric_logger.update(loss=loss_value, step_time=timer.measured)331 if step % print_freq == 0 and step > 0:332 metric_logger.print(header=f"Test: [{step}/{steps_per_epoch}]")333 y_true = torch.cat(y_true)334 y_score = torch.cat(y_score)335 before_auc_timestamp = time()336 auc = utils.roc_auc_score(y_true=y_true, y_score=y_score)337 print(f'AUC computation took: {time() - before_auc_timestamp:.2f} [s]')338 model.train()339 return metric_logger.loss.global_avg, auc, metric_logger.step_time.avg340def inference_benchmark(model, data_loader, num_batches=100):341 model.eval()342 base_device = FLAGS.base_device343 latencies = []344 with torch.no_grad():345 for step, (numerical_features, categorical_features, click) in enumerate(data_loader):346 if step > num_batches:347 break348 step_start_time = time()349 numerical_features = numerical_features.to(base_device)350 if FLAGS.amp:351 numerical_features = numerical_features.half()352 categorical_features = categorical_features.to(device=base_device, dtype=torch.int64)353 _ = model(numerical_features, categorical_features).squeeze()354 torch.cuda.synchronize()355 step_time = time() - step_start_time356 if step >= FLAGS.benchmark_warmup_steps:357 latencies.append(step_time)358 return latencies359if __name__ == '__main__':...

Full Screen

Full Screen

parse_html.py

Source:parse_html.py Github

copy

Full Screen

1# -*- coding:utf-8 -*-2import re3import os4import pandas5from collections import OrderedDict6from lxml import etree7class EndReportParse(object):8 def __init__(self, filepath):9 self.filepath = filepath # HTML file path10 self.file_name = filepath.split('\\')[-1].split('.html')[0] # The name used for Excel saving11 self.storage_folder = 'result' # Storage folder name12 self.data = None13 self.step_info = None14 self.final_data = []15 def read_html_file(self):16 """Read html file data to string"""17 with open(self.filepath, 'r') as rf:18 self.data = rf.read()19 def capture_step_info(self, html_node):20 """21 capture each step information22 :param html_node:23 :return:24 """25 self.step_info = OrderedDict(26 test_step_name='',27 seq_name='',28 test_step_status='',29 test_step_time='',30 module_path='',31 func_name='',32 sent_info='',33 )34 # Capture test_step_name35 test_step_name = re.search('Step "(.+?)"', html_node.xpath('table/caption/span/text()')[0])36 if test_step_name:37 self.step_info['test_step_name'] = test_step_name.group(1).strip()38 # Capture test_step_status39 test_step_status = html_node.xpath('table/caption/span/strong/text()')40 if test_step_status:41 self.step_info['test_step_status'] = test_step_status[0].strip()42 # Capture test_step_time43 test_step_time = html_node.xpath('table/caption/span/text()')44 if test_step_time:45 self.step_info['test_step_time'] = test_step_time[1].split()[1].strip()46 # Capture seq_name47 seq_name = html_node.xpath('table/tbody/tr[4]/td/text()')48 if seq_name:49 if '|' in seq_name[0].strip():50 self.step_info['seq_name'] = seq_name[0].strip()51 # Capture module_path52 module_path = html_node.xpath('table/tbody/tr[5]/td/text()')53 if module_path:54 self.step_info['module_path'] = module_path[0].strip()55 # Capture func_name56 func_name = html_node.xpath('table/tbody/tr[6]/td/text()')57 if func_name:58 self.step_info['func_name'] = func_name[0].strip()59 # Capture sent_info60 sent_info = html_node.xpath('table/tbody/tr[10]/td/details/table/tbody/tr')61 if sent_info:62 temp = []63 for each in sent_info:64 sent_type = each.xpath('td[2]/text()')65 if sent_type and 'SENT' in sent_type[0].upper():66 conn_name = each.xpath('td[3]/text()')[0].split('|')[-1]67 commands = each.xpath('td[6]/text()')[0]68 temp.append('[{}: {}]'.format(conn_name, commands))69 self.step_info['sent_info'] = ', '.join(temp)70 self.final_data.append(list(self.step_info.values()))71 def save_data(self):72 """Write data to excel"""73 if not os.path.isdir(self.storage_folder):74 os.mkdir(self.storage_folder)75 data = pandas.DataFrame(self.final_data, columns=list(self.step_info.keys()))76 data.to_excel('{}/{}.xlsx'.format(self.storage_folder, self.file_name), index=False, sheet_name='Result')77 def parse(self):78 """Parsing HTML files"""79 html = etree.HTML(self.data)80 all_step = html.xpath('//blockquote') # Capture all step node81 for each in all_step:82 self.capture_step_info(each)83 def main(self):84 self.read_html_file()85 self.parse()86 self.save_data()87def main():88 html_file = []89 for file_name in os.listdir(os.getcwd()): # Reads all HTML files in the current path90 if '.html' in file_name:91 html_file.append(file_name)92 for html in html_file:93 print('Start parsing the ({}) file'.format(html))94 report_parse = EndReportParse(filepath=html)95 report_parse.main()96if __name__ == '__main__':...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run grail automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful