How to use test_step_time method in grail

Best Python code snippet using grail_python

main.py

Source:main.py

1# Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.2#3# Licensed under the Apache License, Version 2.0 (the "License");4# you may not use this file except in compliance with the License.5# You may obtain a copy of the License at6#7#       http://www.apache.org/licenses/LICENSE-2.08#9# Unless required by applicable law or agreed to in writing, software10# distributed under the License is distributed on an "AS IS" BASIS,11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.12# See the License for the specific language governing permissions and13# limitations under the License.14import datetime15from time import time16import dllogger17import numpy as np18import torch19from absl import app, flags20from apex import amp21import dlrm.scripts.utils as utils22from dlrm.data.data_loader import get_data_loaders23from dlrm.data.utils import get_categorical_feature_sizes, prefetcher24from dlrm.model.single import Dlrm25from dlrm.utils.checkpointing.serial import SerialCheckpointWriter, make_serial_checkpoint_writer, \26    make_serial_checkpoint_loader27FLAGS = flags.FLAGS28# Basic run settings29flags.DEFINE_enum("mode", default='train', enum_values=['train', 'test', 'inference_benchmark'],30                  help="Select task to be performed")31flags.DEFINE_integer("seed", 12345, "Random seed")32# Training schedule flags33flags.DEFINE_integer("batch_size", 32768, "Batch size used for training")34flags.DEFINE_integer("test_batch_size", 32768, "Batch size used for testing/validation")35flags.DEFINE_float("lr", 28, "Base learning rate")36flags.DEFINE_integer("epochs", 1, "Number of epochs to train for")37flags.DEFINE_integer("max_steps", None, "Stop training after doing this many optimization steps")38flags.DEFINE_integer("warmup_factor", 0, "Learning rate warmup factor. Must be a non-negative integer")39flags.DEFINE_integer("warmup_steps", 6400, "Number of warmup optimization steps")40flags.DEFINE_integer("decay_steps", 80000, "Polynomial learning rate decay steps. If equal to 0 will not do any decaying")41flags.DEFINE_integer("decay_start_step", 64000,42    "Optimization step after which to start decaying the learning rate, if None will start decaying right after the warmup phase is completed")43flags.DEFINE_integer("decay_power", 2, "Polynomial learning rate decay power")44flags.DEFINE_float("decay_end_lr", 0, "LR after the decay ends")45# Model configuration46flags.DEFINE_enum("embedding_type", "joint_fused", ["joint", "joint_fused", "joint_sparse", "multi_table"],47                  help="The type of the embedding operation to use")48flags.DEFINE_integer("embedding_dim", 128, "Dimensionality of embedding space for categorical features")49flags.DEFINE_list("top_mlp_sizes", [1024, 1024, 512, 256, 1], "Linear layer sizes for the top MLP")50flags.DEFINE_list("bottom_mlp_sizes", [512, 256, 128], "Linear layer sizes for the bottom MLP")51flags.DEFINE_enum("interaction_op", default="cuda_dot", enum_values=["cuda_dot", "dot", "cat"],52                  help="Type of interaction operation to perform.")53flags.DEFINE_string(54    "dataset", None,55    "Full path to binary dataset. Must include files such as: train_data.bin, test_data.bin")56flags.DEFINE_enum("dataset_type", default="split", enum_values=['binary', 'split', 'synthetic_gpu', 'synthetic_disk'],57                  help='The type of the dataset to use')58flags.DEFINE_string("synthetic_dataset_dir", "/tmp/dlrm_sythetic_dataset", "Default synthetic disk dataset directory")59flags.DEFINE_list("synthetic_dataset_table_sizes", default=','.join(26 * [str(10**5)]),60                  help="Embedding table sizes to use with the synthetic dataset")61flags.DEFINE_integer("synthetic_dataset_num_entries", default=int(2**15 * 1024), # 1024 batches by default62                     help="Number of samples per epoch for the synthetic dataset")63flags.DEFINE_boolean("shuffle_batch_order", False, "Read batch in train dataset by random order", short_name="shuffle")64flags.DEFINE_integer("num_numerical_features", 13,65                     "Number of numerical features in the dataset. Defaults to 13 for the Criteo Terabyte Dataset")66flags.DEFINE_integer("max_table_size", None,67                     "Maximum number of rows per embedding table, by default equal to the number of unique values for each categorical variable")68flags.DEFINE_boolean("hash_indices", False,69                     "If True the model will compute `index := index % table size` to ensure that the indices match table sizes")70flags.DEFINE_float("dataset_subset", None,71     "Use only a subset of the training data. If None (default) will use all of it. Must be either None, or a float in range [0,1]")72# Checkpointing73flags.DEFINE_string("load_checkpoint_path", None, "Path from which to load a checkpoint")74flags.DEFINE_string("save_checkpoint_path", None, "Path to which to save the training checkpoints")75# Saving and logging flags76flags.DEFINE_string("output_dir", "/tmp", "Path where to save the checkpoints")77flags.DEFINE_string("log_path", "./log.json", "Destination for the log file with various results and statistics")78flags.DEFINE_integer("test_freq", None, "Number of optimization steps between validations. If None will test after each epoch")79flags.DEFINE_float("test_after", 0, "Don't test the model unless this many epochs has been completed")80flags.DEFINE_integer("print_freq", 200, "Number of optimizations steps between printing training status to stdout")81flags.DEFINE_integer("benchmark_warmup_steps", 0, "Number of initial iterations to exclude from throughput measurements")82# Machine setting flags83flags.DEFINE_string("base_device", "cuda", "Device to run the majority of the model operations")84flags.DEFINE_boolean("amp", False, "If True the script will use Automatic Mixed Precision")85flags.DEFINE_float("loss_scale", 1024, "Static loss scale for Mixed Precision Training")86# inference benchmark87flags.DEFINE_list("inference_benchmark_batch_sizes", default=[1, 64, 4096],88                  help="Batch sizes for inference throughput and latency measurements")89flags.DEFINE_integer("inference_benchmark_steps", 200,90                     "Number of steps for measuring inference latency and throughput")91flags.DEFINE_float("auc_threshold", None, "Stop the training after achieving this AUC")92flags.DEFINE_boolean("optimized_mlp", True, "Use an optimized implementation of MLP from apex")93def validate_flags():94    if FLAGS.max_table_size is not None and not FLAGS.hash_indices:95       raise ValueError('Hash indices must be True when setting a max_table_size')96    if FLAGS.base_device == 'cpu':97        if FLAGS.embedding_type in ('joint_fused', 'joint_sparse'):98            print('WARNING: CUDA joint embeddings are not supported on CPU')99            FLAGS.embedding_type = 'joint'100        if FLAGS.amp:101            print('WARNING: Automatic mixed precision not supported on CPU')102            FLAGS.amp = False103        if FLAGS.optimized_mlp:104            print('WARNING: Optimized MLP is not supported on CPU')105            FLAGS.optimized_mlp = False106def is_data_prefetching_enabled() -> bool:107    return FLAGS.base_device == 'cuda'108def create_model():109    print("Creating model")110    FLAGS.top_mlp_sizes = [int(s) for s in FLAGS.top_mlp_sizes]111    FLAGS.bottom_mlp_sizes = [int(s) for s in FLAGS.bottom_mlp_sizes]112    model_config = {113        'top_mlp_sizes': FLAGS.top_mlp_sizes,114        'bottom_mlp_sizes': FLAGS.bottom_mlp_sizes,115        'embedding_type': FLAGS.embedding_type,116        'embedding_dim': FLAGS.embedding_dim,117        'interaction_op': FLAGS.interaction_op,118        'categorical_feature_sizes': get_categorical_feature_sizes(FLAGS),119        'num_numerical_features': FLAGS.num_numerical_features,120        'hash_indices': FLAGS.hash_indices,121        'use_cpp_mlp': FLAGS.optimized_mlp,122        'fp16': FLAGS.amp,123        'base_device': FLAGS.base_device,124    }125    model = Dlrm.from_dict(model_config)126    print(model)127    model.to(FLAGS.base_device)128    if FLAGS.load_checkpoint_path is not None:129        checkpoint_loader = make_serial_checkpoint_loader(130            embedding_indices=range(len(get_categorical_feature_sizes(FLAGS))),131            device="cpu"132        )133        checkpoint_loader.load_checkpoint(model, FLAGS.load_checkpoint_path)134        model.to(FLAGS.base_device)135    return model136def main(argv):137    validate_flags()138    torch.manual_seed(FLAGS.seed)139    utils.init_logging(log_path=FLAGS.log_path)140    dllogger.log(data=FLAGS.flag_values_dict(), step='PARAMETER')141    data_loader_train, data_loader_test = get_data_loaders(FLAGS)142    scaled_lr = FLAGS.lr / FLAGS.loss_scale if FLAGS.amp else FLAGS.lr143    model = create_model()144    optimizer = torch.optim.SGD(model.parameters(), lr=scaled_lr)145    if FLAGS.amp and FLAGS.mode == 'train':146        (model.top_model, model.bottom_model.mlp), optimizer = amp.initialize([model.top_model, model.bottom_model.mlp],147                                                                              optimizer, opt_level="O2", loss_scale=1)148    elif FLAGS.amp:149        model = model.half()150    loss_fn = torch.nn.BCEWithLogitsLoss(reduction="mean")151    if FLAGS.mode == 'test':152        loss, auc, test_step_time = evaluate(model, loss_fn, data_loader_test)153        avg_test_throughput = FLAGS.batch_size / test_step_time154        results = {'auc': auc,155                   'avg_inference_latency': test_step_time,156                   'average_test_throughput': avg_test_throughput}157        dllogger.log(data=results, step=tuple())158        print(f"Finished testing. Test Loss {loss:.4f}, auc {auc:.4f}")159        return160    if FLAGS.mode == 'inference_benchmark':161        results = {}162        if FLAGS.amp:163            # can use pure FP16 for inference164            model = model.half()165        for batch_size in FLAGS.inference_benchmark_batch_sizes:166            batch_size = int(batch_size)167            FLAGS.test_batch_size = batch_size168            _, benchmark_data_loader = get_data_loaders(FLAGS)169            latencies = inference_benchmark(model=model, data_loader=benchmark_data_loader,170                                            num_batches=FLAGS.inference_benchmark_steps)171            print("All inference latencies: {}".format(latencies))172            mean_latency = np.mean(latencies)173            mean_inference_throughput = batch_size / mean_latency174            subresult = {f'mean_inference_latency_batch_{batch_size}': mean_latency,175                         f'mean_inference_throughput_batch_{batch_size}': mean_inference_throughput}176            results.update(subresult)177        dllogger.log(data=results, step=tuple())178        print(f"Finished inference benchmark.")179        return180    if FLAGS.mode == 'train':181        train(model, loss_fn, optimizer, data_loader_train, data_loader_test, scaled_lr)182def maybe_save_checkpoint(checkpoint_writer: SerialCheckpointWriter, model, path):183    if path is None:184        return185    print(f'Saving a checkpoint to {path}')186    begin = time()187    checkpoint_writer.save_checkpoint(model, path)188    end = time()189    print(f'Checkpoint saving took {end-begin:,.2f} [s]')190def train(model, loss_fn, optimizer, data_loader_train, data_loader_test, scaled_lr):191    """Train and evaluate the model192    Args:193        model (dlrm):194        loss_fn (torch.nn.Module): Loss function195        optimizer (torch.nn.optim):196        data_loader_train (torch.utils.data.DataLoader):197        data_loader_test (torch.utils.data.DataLoader):198    """199    model.train()200    prefetching_enabled = is_data_prefetching_enabled()201    base_device = FLAGS.base_device202    print_freq = FLAGS.print_freq203    steps_per_epoch = len(data_loader_train)204    checkpoint_writer = make_serial_checkpoint_writer(205        embedding_indices=range(len(get_categorical_feature_sizes(FLAGS))),206        config=FLAGS.flag_values_dict()207    )208    test_freq = FLAGS.test_freq if FLAGS.test_freq is not None else steps_per_epoch - 1209    metric_logger = utils.MetricLogger(delimiter="  ")210    metric_logger.add_meter('loss', utils.SmoothedValue(window_size=1, fmt='{value:.4f}'))211    metric_logger.add_meter('step_time', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))212    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.4f}'))213    if prefetching_enabled:214        data_stream = torch.cuda.Stream()215    timer = utils.StepTimer()216    best_auc = 0217    best_epoch = 0218    start_time = time()219    timer.click()220    for epoch in range(FLAGS.epochs):221        input_pipeline = iter(data_loader_train)222        if prefetching_enabled:223            input_pipeline = prefetcher(input_pipeline, data_stream)224        for step, batch in enumerate(input_pipeline):225            global_step = steps_per_epoch * epoch + step226            numerical_features, categorical_features, click = batch227            utils.lr_step(optimizer, num_warmup_iter=FLAGS.warmup_steps, current_step=global_step + 1,228                          base_lr=scaled_lr, warmup_factor=FLAGS.warmup_factor,229                          decay_steps=FLAGS.decay_steps, decay_start_step=FLAGS.decay_start_step)230            if FLAGS.max_steps and global_step > FLAGS.max_steps:231                print(f"Reached max global steps of {FLAGS.max_steps}. Stopping.")232                break233            if prefetching_enabled:234                torch.cuda.synchronize()235            output = model(numerical_features, categorical_features).squeeze().float()236            loss = loss_fn(output, click.squeeze())237            # Setting grad to None is faster than zero_grad()238            for param_group in optimizer.param_groups:239                for param in param_group['params']:240                    param.grad = None241            if FLAGS.amp:242                loss *= FLAGS.loss_scale243                with amp.scale_loss(loss, optimizer) as scaled_loss:244                    scaled_loss.backward()245            else:246                loss.backward()247            optimizer.step()248            if step % print_freq == 0 and step > 0:249                loss_value = loss.item()250                timer.click()251                if global_step < FLAGS.benchmark_warmup_steps:252                    metric_logger.update(253                        loss=loss_value, lr=optimizer.param_groups[0]["lr"])254                else:255                    unscale_factor = FLAGS.loss_scale if FLAGS.amp else 1256                    metric_logger.update(257                        loss=loss_value / unscale_factor,258                        step_time=timer.measured / FLAGS.print_freq,259                        lr=optimizer.param_groups[0]["lr"] * unscale_factor260                    )261                if global_step < FLAGS.benchmark_warmup_steps:262                    print(f'Warming up, step [{global_step}/{FLAGS.benchmark_warmup_steps}]')263                    continue264                eta_str = datetime.timedelta(seconds=int(metric_logger.step_time.global_avg * (steps_per_epoch - step)))265                metric_logger.print(266                    header=f"Epoch:[{epoch}/{FLAGS.epochs}] [{step}/{steps_per_epoch}]  eta: {eta_str}")267            if (global_step % test_freq == 0 and global_step > 0 and268                    global_step / steps_per_epoch >= FLAGS.test_after):269                loss, auc, test_step_time = evaluate(model, loss_fn, data_loader_test)270                print(f"Epoch {epoch} step {step}. Test loss {loss:.5f}, auc {auc:.6f}")271                if auc > best_auc:272                    best_auc = auc273                    best_epoch = epoch + ((step + 1) / steps_per_epoch)274                    maybe_save_checkpoint(checkpoint_writer, model, FLAGS.save_checkpoint_path)275                if FLAGS.auc_threshold and auc >= FLAGS.auc_threshold:276                    stop_time = time()277                    run_time_s = int(stop_time - start_time)278                    print(f"Hit target accuracy AUC {FLAGS.auc_threshold} at epoch "279                          f"{global_step/steps_per_epoch:.2f} in {run_time_s}s. "280                          f"Average speed {global_step * FLAGS.batch_size / run_time_s:.1f} records/s.")281                    return282    stop_time = time()283    run_time_s = int(stop_time - start_time)284    print(f"Finished training in {run_time_s}s. "285          f"Average speed {global_step * FLAGS.batch_size / run_time_s:.1f} records/s.")286    avg_throughput = FLAGS.batch_size / metric_logger.step_time.avg287    results = {'best_auc' : best_auc,288               'best_epoch' : best_epoch,289               'average_train_throughput' : avg_throughput}290    if 'test_step_time' in locals():291        avg_test_throughput = FLAGS.test_batch_size / test_step_time292        results['average_test_throughput'] = avg_test_throughput293    dllogger.log(data=results, step=tuple())294def evaluate(model, loss_fn, data_loader):295    """Test dlrm model296    Args:297        model (dlrm):298        loss_fn (torch.nn.Module): Loss function299        data_loader (torch.utils.data.DataLoader):300    """301    model.eval()302    print_freq = FLAGS.print_freq303    prefetching_enabled = is_data_prefetching_enabled()304    steps_per_epoch = len(data_loader)305    metric_logger = utils.MetricLogger(delimiter="  ")306    metric_logger.add_meter('loss', utils.SmoothedValue(window_size=1, fmt='{avg:.4f}'))307    metric_logger.add_meter('step_time', utils.SmoothedValue(window_size=1, fmt='{avg:.4f}'))308    if prefetching_enabled:309        data_stream = torch.cuda.Stream()310    with torch.no_grad():311        y_true = []312        y_score = []313        timer = utils.StepTimer()314        timer.click()315        input_pipeline = iter(data_loader)316        if prefetching_enabled:317            input_pipeline = prefetcher(input_pipeline, data_stream)318        for step, (numerical_features, categorical_features, click) in enumerate(input_pipeline):319            if FLAGS.amp:320                numerical_features = numerical_features.half()321            if prefetching_enabled:322                torch.cuda.synchronize()323            output = model(numerical_features, categorical_features).squeeze()324            loss = loss_fn(output, click)325            y_true.append(click)326            y_score.append(output)327            loss_value = loss.item()328            timer.click()329            if timer.measured is not None:330                metric_logger.update(loss=loss_value, step_time=timer.measured)331                if step % print_freq == 0 and step > 0:332                    metric_logger.print(header=f"Test: [{step}/{steps_per_epoch}]")333        y_true = torch.cat(y_true)334        y_score = torch.cat(y_score)335        before_auc_timestamp = time()336        auc = utils.roc_auc_score(y_true=y_true, y_score=y_score)337        print(f'AUC computation took: {time() - before_auc_timestamp:.2f} [s]')338    model.train()339    return metric_logger.loss.global_avg, auc, metric_logger.step_time.avg340def inference_benchmark(model, data_loader, num_batches=100):341    model.eval()342    base_device = FLAGS.base_device343    latencies = []344    with torch.no_grad():345        for step, (numerical_features, categorical_features, click) in enumerate(data_loader):346            if step > num_batches:347                break348            step_start_time = time()349            numerical_features = numerical_features.to(base_device)350            if FLAGS.amp:351                numerical_features = numerical_features.half()352            categorical_features = categorical_features.to(device=base_device, dtype=torch.int64)353            _ = model(numerical_features, categorical_features).squeeze()354            torch.cuda.synchronize()355            step_time = time() - step_start_time356            if step >= FLAGS.benchmark_warmup_steps:357                latencies.append(step_time)358    return latencies359if __name__ == '__main__':...

parse_html.py

Source:parse_html.py

1# -*- coding:utf-8 -*-2import re3import os4import pandas5from collections import OrderedDict6from lxml import etree7class EndReportParse(object):8    def __init__(self, filepath):9        self.filepath = filepath  # HTML file path10        self.file_name = filepath.split('\\')[-1].split('.html')[0]  # The name used for Excel saving11        self.storage_folder = 'result'  # Storage folder name12        self.data = None13        self.step_info = None14        self.final_data = []15    def read_html_file(self):16        """Read html file data to string"""17        with open(self.filepath, 'r') as rf:18            self.data = rf.read()19    def capture_step_info(self, html_node):20        """21        capture each step information22        :param html_node:23        :return:24        """25        self.step_info = OrderedDict(26            test_step_name='',27            seq_name='',28            test_step_status='',29            test_step_time='',30            module_path='',31            func_name='',32            sent_info='',33        )34        # Capture test_step_name35        test_step_name = re.search('Step "(.+?)"', html_node.xpath('table/caption/span/text()')[0])36        if test_step_name:37            self.step_info['test_step_name'] = test_step_name.group(1).strip()38        # Capture test_step_status39        test_step_status = html_node.xpath('table/caption/span/strong/text()')40        if test_step_status:41            self.step_info['test_step_status'] = test_step_status[0].strip()42        # Capture test_step_time43        test_step_time = html_node.xpath('table/caption/span/text()')44        if test_step_time:45            self.step_info['test_step_time'] = test_step_time[1].split()[1].strip()46        # Capture seq_name47        seq_name = html_node.xpath('table/tbody/tr[4]/td/text()')48        if seq_name:49            if '|' in seq_name[0].strip():50                self.step_info['seq_name'] = seq_name[0].strip()51        # Capture module_path52        module_path = html_node.xpath('table/tbody/tr[5]/td/text()')53        if module_path:54            self.step_info['module_path'] = module_path[0].strip()55        # Capture func_name56        func_name = html_node.xpath('table/tbody/tr[6]/td/text()')57        if func_name:58            self.step_info['func_name'] = func_name[0].strip()59        # Capture sent_info60        sent_info = html_node.xpath('table/tbody/tr[10]/td/details/table/tbody/tr')61        if sent_info:62            temp = []63            for each in sent_info:64                sent_type = each.xpath('td[2]/text()')65                if sent_type and 'SENT' in sent_type[0].upper():66                    conn_name = each.xpath('td[3]/text()')[0].split('|')[-1]67                    commands = each.xpath('td[6]/text()')[0]68                    temp.append('[{}: {}]'.format(conn_name, commands))69            self.step_info['sent_info'] = ', '.join(temp)70        self.final_data.append(list(self.step_info.values()))71    def save_data(self):72        """Write data to excel"""73        if not os.path.isdir(self.storage_folder):74            os.mkdir(self.storage_folder)75        data = pandas.DataFrame(self.final_data, columns=list(self.step_info.keys()))76        data.to_excel('{}/{}.xlsx'.format(self.storage_folder, self.file_name), index=False, sheet_name='Result')77    def parse(self):78        """Parsing HTML files"""79        html = etree.HTML(self.data)80        all_step = html.xpath('//blockquote')  # Capture all step node81        for each in all_step:82            self.capture_step_info(each)83    def main(self):84        self.read_html_file()85        self.parse()86        self.save_data()87def main():88    html_file = []89    for file_name in os.listdir(os.getcwd()):  # Reads all HTML files in the current path90        if '.html' in file_name:91            html_file.append(file_name)92    for html in html_file:93        print('Start parsing the ({}) file'.format(html))94        report_parse = EndReportParse(filepath=html)95        report_parse.main()96if __name__ == '__main__':...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.