Best Python code snippet using gherkin-python
preprocess_BERT.py
Source:preprocess_BERT.py  
1from __future__ import division2import random3import sys4import io5import os6import logging7import re8import pandas as pd9import ujson as json10import os.path as op11from tqdm import tqdm12from collections import Counter, OrderedDict13import argparse14program = os.path.basename(sys.argv[0])15L = logging.getLogger(program)16logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')17logging.root.setLevel(level=logging.INFO)18L.info("Running %s" % ' '.join(sys.argv))19entity_linking_pattern = re.compile('#.*?;-*[0-9]+,(-*[0-9]+)#')20fact_pattern = re.compile('#(.*?);-*[0-9]+,-*[0-9]+#')21unk_pattern = re.compile('#([^#]+);-1,-1#')22TSV_DELIM = "\t"23TBL_DELIM = " ; "24def join_unicode(delim, entries):25    #entries = [_.decode('utf8') for _ in entries]26    return delim.join(entries)27def parse_fact(fact):28    fact = re.sub(unk_pattern, '[UNK]', fact)29    chunks = re.split(fact_pattern, fact)30    output = ' '.join([x.strip() for x in chunks if len(x.strip()) > 0])31    return output32def process_file(data_dir, shuffle=False):33    all_csv_dir = op.join(data_dir, "data/all_csv")34    all_data = op.join(data_dir, "tokenized_data/full_cleaned.json")35    examples = []36    with io.open(all_data, 'r', encoding='utf8') as fin:37        dataset = json.load(fin)38        for idx, (fname, sample) in tqdm(enumerate(dataset.items())):39            print(fname)40            try:41                if(len(sample)==0):42                  continue43                print(fname, "succcess")44                table = pd.read_csv(op.join(all_csv_dir, fname), delimiter='[#]')45                #print(table)46                # facts: list of strings47                facts = sample[0]48                # labels: list of ints49                labels = sample[1]50                print(len(facts), len(labels))51                assert all([x in [0, 1, 2] for x in labels])52                assert len(facts) == len(labels)53                # types: list of table column strings54                types = [str(x) for x in table.columns.values.tolist()]55                # columns: {type: list of cell phrases in this column}56                columns = OrderedDict()57                for t in types:58                    # np array of cells in the one-column table (dataframe) --> list59                    one_column = [str(x) for x in table[t].to_numpy().tolist()]60                    columns[t] = one_column61                # pack into one example62                example = {63                    "csv": fname,64                    "columns": columns,65                    "facts": facts,66                    "labels": labels67                }68                examples.append(example)69            except:70                print("{} is misformated".format(fname))71        if shuffle:72            random.shuffle(examples)73        print("{} samples in total".format(len(examples)))74    return examples75def convert_to_tsv(out_file, examples, dataset_type, meta, scan):76    L.info("Processing {} examples...".format(dataset_type))77    total = 078    unk = 079    len_total = 080    empty_table = 081    with io.open(out_file, 'w', encoding='utf-8') as fout:82        for example in tqdm(examples):83            assert len(example['facts']) == len(example['labels'])84            for fact, label in zip(example['facts'], example['labels']):85                # use entity linking info to retain relevant columns86                useful_column_nums = [int(x) for x in re.findall(entity_linking_pattern, fact) if not x == '-1']87                useful_column_nums = dict.fromkeys(useful_column_nums)88                remaining_table = OrderedDict()89                for idx, (column_type, column_cells) in enumerate(example['columns'].items()):90                    if idx in useful_column_nums:91                        column_type = '_'.join(column_type.split())92                        remaining_table[column_type] = column_cells93                fact_clean = parse_fact(fact)94                if len(remaining_table) > 0:95                    table_cells, table_feats = [], []96                    len_total += 197                    if scan == 'vertical':98                        for column_type, column_cells in remaining_table.items():99                            column_type = ' '.join(column_type.split('_'))100                            table_cells.extend([column_type, 'are :'])101                            this_column = []102                            for idx, c in enumerate(column_cells):103                                this_column.append("row {} is {}".format(idx + 1, c))104                            this_column = join_unicode(TBL_DELIM, this_column)105                            table_cells.append(this_column)106                            table_cells.append('.')107                            table_feats.append(column_type)108                    else:109                        # stupid but to reserve order110                        table_column_names, table_column_cells = [], []111                        for column_type, column_cells in remaining_table.items():112                            column_type = ' '.join(column_type.split('_'))113                            table_feats.append(column_type)114                            table_column_names.append(column_type)115                            table_column_cells.append(column_cells)116                        for idx, row in enumerate(zip(*table_column_cells)):117                            table_cells.append('row {} is :'.format(idx + 1))118                            this_row = []119                            for col, tk in zip(table_column_names, row):120                                this_row.append('{} is {}'.format(col, tk))121                            this_row = join_unicode(TBL_DELIM, this_row)122                            table_cells.append(this_row)123                            table_cells.append('.')124                    table_str = ' '.join(table_cells)125                    out_items = [example['csv'],126                                 str(len(table_feats)),127                                 ' '.join([str(x) for x in table_feats]),128                                 table_str,129                                 fact_clean,130                                 str(label)]131                    out_items = TSV_DELIM.join(out_items)132                    total += 1133                    fout.write(out_items + "\n")134                else:135                    if dataset_type != 'train':136                        table_feats = ['[UNK]']137                        table_cells = ['[UNK]']138                        table_str = ' '.join(table_cells)139                        out_items = [example['csv'],140                                     str(len(table_feats)),141                                     ' '.join([str(x) for x in table_feats]),142                                     table_str,143                                     fact_clean,144                                     str(label)]145                        out_items = TSV_DELIM.join(out_items)146                        fout.write(out_items + "\n")147                        total += 1148                    empty_table += 1149    print("Built {} instances of features in total, {}/{}={}% unseen column types, {} empty tables"150          .format(total, unk, len_total, "{0:.2f}".format(unk * 100 / len_total), empty_table))151    meta["{}_total".format(dataset_type)] = total152    return meta153def split_dataset(data_dir, all_examples):154    total_size = len(all_examples)155    L.info("split {} tables into train dev test ...".format(total_size))156    data_dir = op.join(data_dir, "data/")157    csv_id_lkt = {}158    for x in ['train', 'val', 'test', 'small_test', 'simple_test', 'complex_test']:159        id_file = op.join(data_dir, "{}_id.json".format(x))160        with io.open(id_file, 'r', encoding='utf-8') as fin:161            csv_id_lkt[x] = dict.fromkeys(json.load(fin), True)162    trainset, validset, testset, small_test, simple_test, complex_test = [], [], [], [], [], []163    for sample in all_examples:164        if sample['csv'] in csv_id_lkt['small_test']:165            small_test.append(sample)166        if sample['csv'] in csv_id_lkt['simple_test']:167            simple_test.append(sample)168        if sample['csv'] in csv_id_lkt['complex_test']:169            complex_test.append(sample)170        if sample['csv'] in csv_id_lkt['test']:171            testset.append(sample)172        if sample['csv'] in csv_id_lkt['train']:173            trainset.append(sample)174        elif sample['csv'] in csv_id_lkt['val']:175            validset.append(sample)176        177        else:178            print('{} is NOT used'.format(sample['csv']))179    return trainset, validset, testset, small_test, simple_test, complex_test180def save(filename, obj, message=None, beautify=False):181    assert message is not None182    print("Saving {} ...".format(message))183    with io.open(filename, "a") as fh:184        if beautify:185            json.dump(obj, fh, sort_keys=True, indent=4)186        else:187            json.dump(obj, fh)188def mkdir_p(path1, path2=None):189    if path2 is not None:190        path1 = os.path.join(path1, path2)191    if not os.path.exists(path1):192        os.mkdir(path1)193    return path1194def count_types(dataset):195    type_cnt = []196    for example in dataset:197        for name in example['columns'].keys():198            type_cnt.append('_'.join(name.split()))199    return type_cnt200if __name__ == "__main__":201    parser = argparse.ArgumentParser()202    parser.add_argument("--data_dir",203                        type=str,204                        default='../',205                        help="The path of TabFact folder")206    parser.add_argument("--output_dir",207                        type=str,208                        default='../processed_datasets',209                        help="The path to save output tsv files")210    parser.add_argument("--scan",211                        default="horizontal",212                        choices=["vertical", "horizontal"],213                        type=str,214                        help="The direction of sequentializing table cells.")215    args = parser.parse_args()216    root_dir = mkdir_p(args.output_dir)217    data_save_dir = mkdir_p(root_dir, "tsv_data_{}".format(args.scan))218    train_tsv = os.path.join(data_save_dir, "train.tsv")219    dev_tsv = os.path.join(data_save_dir, "dev.tsv")220    test_tsv = os.path.join(data_save_dir, "test.tsv")221    small_test_tsv = os.path.join(data_save_dir, "small_test.tsv")222    simple_test_tsv = os.path.join(data_save_dir, "simple_test.tsv")223    complex_test_tsv = os.path.join(data_save_dir, "complex_test.tsv")224    meta_file = os.path.join(data_save_dir, "meta.json")225    type2idx_file = os.path.join(data_save_dir, "type2idx.json")226    idx2type_file = os.path.join(data_save_dir, "idx2type.json")227    L.info("process file ...")228    all_examples = process_file(args.data_dir)229    L.info("spliting datasets ...")230    trainset, devset, testset, small_test, simple_test, complex_test = split_dataset(args.data_dir, all_examples)231    L.info("build tsv datasets ...")232    meta = {}233    meta = convert_to_tsv(train_tsv, trainset, "train", meta, args.scan)234    meta = convert_to_tsv(dev_tsv, devset, "dev", meta, args.scan)235    meta = convert_to_tsv(test_tsv, testset, "test", meta, args.scan)236    meta = convert_to_tsv(small_test_tsv, small_test, "small_test", meta, args.scan)237    meta = convert_to_tsv(simple_test_tsv, simple_test, "simple_test", meta, args.scan)238    meta = convert_to_tsv(complex_test_tsv, complex_test, "complex_test", meta, args.scan)...generate_graphs.py
Source:generate_graphs.py  
1import matplotlib.pyplot as plt2import numpy as np3import os4import plotly.plotly as py5import plotly.graph_objs as go6import plotly.offline7colors = {8    'pyBW exact': 'orange',9    'pyBW app.': 'red',10    'pyBG exact': 'green',11    'pyBG app. bin=int_size/5': 'cyan',12    'pyBG app. bin=int_size/10': 'magenta',13    'pyBG app. bin=int_size/20': 'blue',14    'pyBG app. bin=100': 'cyan',15    'pyBG app. bin=50': 'magenta',16    'pyBG app. bin=25': 'blue'17}18METHOD_LIST = [19    'pyBW exact',20    'pyBW app.',21    'pyBG exact',22    'pyBG app. bin=int_size/5',23    'pyBG app. bin=int_size/10',24    'pyBG app. bin=int_size/20'25]26TABLE_HEADERS = [27    'Interval Size (bPS)',28    'Error Rate (%)',29    'Mean Squared Error',30    'Absolute Error',31    '# Actual is 0'32]33RUNTIME_TABLE_HEADERS = [34    'Dataset',35    'pyBW exact',36    'pyBW app.',37    'pyBG exact',38    'pyBG app. bin=100',39    'pyBG app. bin=50',40    'pyBG app. bin=25'41]42TITLE_FONT_SIZE = 1643AXIS_FONT_SIZE = 1244LEGEND_FONT_SIZE = 1045GRAPH_ROOT_LOCATION = 'graphs'46NUM_ERROR_TYPES = 447sample_runtimes = {}48# Interval size: 50049def create_runtime_num_test(infile, data_name):50    line = infile.readline()51    num_tests = [int(x) for x in line.split()]52    run_times = {}53    while True:54        name = infile.readline().strip().strip()55        if name == "":56            break57        results = infile.readline().split()58        run_times[name] = [float(x) for x in results]59    sample_runtimes[data_name] = {}60    for test in run_times:61        sample_runtimes[data_name][test] = run_times[test][-1]62    i = 063    for name in run_times:64        plt.plot([np.log10(x) for x in num_tests], [np.log10(x) for x in run_times[name]],65                 color=colors[name], label=name)66        i += 167    plt.title(f"Run Time for {data_name}", fontsize=TITLE_FONT_SIZE)68    plt.xlabel("log10(# of tests)", fontsize=AXIS_FONT_SIZE)69    plt.ylabel("log10(runtime (seconds))", fontsize=AXIS_FONT_SIZE)70    plt.legend(loc='best', fontsize=LEGEND_FONT_SIZE)71    plt.savefig(f'graphs/{data_name}/run_time.png', dpi=300)72    plt.close()73# Number of tests: 10,00074def create_interval_error(in_file, data_name):75    line = in_file.readline()76    intervals = [int(x) for x in line.split()]77    errors = {}78    table_cells = {}79    while True:80        name = in_file.readline().strip()81        if name == "":82            break83        table_cells[name] = [[x] for x in intervals]84        error_list = []85        for i in range(NUM_ERROR_TYPES):86            error_list.append(in_file.readline().split())87        errors[name] = {}88        for line in error_list:89            error_name = line[0]90            errors[name][error_name] = [float(line[x]) for x in range(1, len(line), 1)]91            for x in range(len(intervals)):92                error = errors[name][error_name][x]93                if error_name == 'not_included':94                    error = int(error)95                if error_name == 'percent_error':96                    error *= 10097                error = round(error, 5)98                table_cells[name][x].append(error)99    # Don't include intervals of over 10k in the graph100    while intervals[-1] >= 10000:101        intervals.pop()102        for name in errors:103            errors[name]['percent_error'].pop()104    i = 0105    for name in errors:106        plt.plot(intervals, [x * 100 for x in errors[name]['percent_error']], color=colors[name], label=name)107        i += 1108    plt.title(f"Percentage Error Rate vs. Interval Size for {data_name}", fontsize=TITLE_FONT_SIZE)109    plt.xlabel("Interval Size (basepairs)", fontsize=AXIS_FONT_SIZE)110    plt.ylabel("Percentage Error Rate (%)", fontsize=AXIS_FONT_SIZE)111    plt.legend(loc='best', fontsize=LEGEND_FONT_SIZE)112    plt.savefig(f'graphs/{data_name}/interval_error.png', dpi=300)113    plt.close()114    for name in table_cells:115        plt.figure()116        plt.title(f"{data_name} --- {name}", fontsize=AXIS_FONT_SIZE)117        table = plt.table(118            cellText=table_cells[name],119            colWidths=[0.027, 0.023, 0.03, 0.022, 0.02],120            colLabels=TABLE_HEADERS,121            loc='center'122        )123        table.auto_set_font_size(False)124        table.set_fontsize(LEGEND_FONT_SIZE)125        table.scale(11, 2)126        plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)127        plt.tick_params(axis='y', which='both', right=False, left=False, labelleft=False)128        for pos in ['right', 'top', 'bottom', 'left']:129            plt.gca().spines[pos].set_visible(False)130        save_name = name.replace(" ", "_").replace('/', '')131        plt.savefig(f'graphs/{data_name}/{save_name}_table.png',132                    bbox_inches='tight', pad_inches=0.05, dpi=300)133        plt.close()134# Number of tests: 10,000135def create_interval_runtime(in_file, data_name):136    line = in_file.readline()137    intervals = [int(x) for x in line.split()]138    run_times = {}139    while True:140        name = in_file.readline().strip()141        if name == '':142            break143        results = in_file.readline().split()144        run_times[name] = [float(x) for x in results]145    table_cells = [[x] for x in intervals]146    for i, interval in enumerate(intervals):147        for name in METHOD_LIST:148            table_cells[i].append(round(run_times[name][i], 5))149    plt.figure()150    plt.title(f"Runtime (seconds) vs. Interval Size for {data_name}\n",151              fontsize=TITLE_FONT_SIZE, y=1.2)152    table = plt.table(153        cellText=table_cells,154        colWidths=[0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2],155        colLabels=['Interval Size'] + METHOD_LIST,156        loc='center'157    )158    table.auto_set_font_size(False)159    table.set_fontsize(LEGEND_FONT_SIZE)160    table.scale(2, 3)161    plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)162    plt.tick_params(axis='y', which='both', right=False, left=False, labelleft=False)163    for pos in ['right', 'top', 'bottom', 'left']:164        plt.gca().spines[pos].set_visible(False)165    plt.savefig(f'graphs/{data_name}/interval_runtime_table.png',166                bbox_inches='tight', pad_inches=0.05, dpi=300)167    plt.close()168    # Don't include intervals of over 10k in the graph169    while intervals[-1] >= 10000:170        intervals.pop()171        for name in run_times:172            run_times[name].pop()173    i = 0174    for name in run_times:175        plt.plot(intervals, [np.log10(x) for x in run_times[name]],176                 color=colors[name], label=name)177        i += 1178    plt.title(f"Run Time vs. Interval Size for {data_name}", fontsize=TITLE_FONT_SIZE)179    plt.xlabel("Interval Size (basepairs)", fontsize=AXIS_FONT_SIZE)180    plt.ylabel("log10(runtime (seconds))", fontsize=AXIS_FONT_SIZE)181    plt.legend(loc='best', fontsize=LEGEND_FONT_SIZE)182    plt.savefig(f'graphs/{data_name}/interval_run_time.png', dpi=300)183    plt.close()184def create_values_indexed(in_file, data_name):185    bin_sizes = in_file.readline().split()186    bin_sizes = [int(x) for x in bin_sizes]187    values_indexed = in_file.readline().split()188    values_indexed = [int(x) for x in values_indexed]189    plt.plot(bin_sizes, values_indexed)190    plt.title(f"Values Indexed vs. Bin Size for Exact Mean Calculation", fontsize=TITLE_FONT_SIZE)191    plt.xlabel("Bin Size", fontsize=AXIS_FONT_SIZE)192    plt.ylabel("Values Indexed", fontsize=AXIS_FONT_SIZE)193    plt.savefig(f'graphs/{data_name}/values_indexed.png')194    plt.close()195def create_million_runtime_table():196    name_order = [197        'ENCFF050CCI',198        'ENCFF321FZQ',199        'ENCFF376VCU',200        'ENCFF384CMP',201        'ENCFF631HEX',202        'ENCFF643WMY',203        'ENCFF770CQD',204        'ENCFF847JMY',205        'ENCFF726XVA',206        'ENCFF877IHY',207        'ENCFF000LAB',208        'ENCFF000KYT'209    ]210    table_cells = [[name] + [round(sample_runtimes[name][stat], 3) for stat in RUNTIME_TABLE_HEADERS[1:]] for name in name_order]211    average = [round(np.mean([sample_runtimes[sample][stat] for sample in sample_runtimes]), 3) for stat in RUNTIME_TABLE_HEADERS[1:]]212    table_cells.append(['Average'] + average)213    plt.figure()214    # plt.title(f"Runtime for 1 Million Test Intervals", fontsize=AXIS_FONT_SIZE)215    table = plt.table(216        cellText=table_cells,217        colWidths=[1/7 for _ in range(7)],218        colLabels=RUNTIME_TABLE_HEADERS,219        loc='center'220    )221    table.auto_set_font_size(False)222    table.set_fontsize(LEGEND_FONT_SIZE)223    table.scale(2, 2)224    plt.tick_params(axis='x', which='both', bottom=False, top=False,225                    labelbottom=False)226    plt.tick_params(axis='y', which='both', right=False, left=False,227                    labelleft=False)228    for pos in ['right', 'top', 'bottom', 'left']:229        plt.gca().spines[pos].set_visible(False)230    plt.savefig(f'graphs/runtime_table.png',231                bbox_inches='tight', pad_inches=0.05)232    plt.close()233def main():234    # create_values_indexed(open('graphs/ENCFF376VCU/values_indexed.txt'), 'ENCFF376VCU')235    for subdir, dirs, files in os.walk(GRAPH_ROOT_LOCATION):236        data_name = subdir[7:]237        print(data_name)238        for file_name in files:239            file_path = subdir + '/' + file_name240            with open(file_path) as in_file:241                if file_name == 'run_time_results.txt':242                    pass243                    # create_runtime_num_test(in_file, data_name)244                elif file_name == 'interval_error_results.txt':245                    pass246                    # create_interval_error(in_file, data_name)247                elif file_name == 'interval_runtime_results.txt':248                    pass249                    create_interval_runtime(in_file, data_name)250                elif file_name[-4:] == '.png' or file_name[-4:] == '.swp':251                    continue252                else:253                    print(f"Unknown file: {file_name}")254    # create_million_runtime_table()255if __name__ == '__main__':...SimpleHTML.py
Source:SimpleHTML.py  
1import json2import base643from PIL import Image4HTML_START = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">5<html lang="en">6<head>7  <title>%s</title>8</head>9<body>\n"""10HTML_END = """</body>11</html>"""12COLOR_DICT = {13    "LightRed": "#FFCCCC",14    "Red": "#FE0000",15    "LightGreen": "#CCFFCC",16    "Green": "#007800",17}18def create_text(text, heading=None, color=None, bold=None):19    if heading:20        html_str = "<h%d>%s</h%d>\n" % (heading, text, heading)21    else:22        html_str = text23    return html_str24def create_image(image_path, width=None, height=None):25    html_str = '<img src="%s" title="%s"' % (image_path, image_path)26    if width:27        html_str += " width=%d" % width28    if height:29        html_str += " height=%d" % height30    html_str += "/>\n"31    return html_str32def create_html_start(title=""):33    return HTML_START % title34def create_html_end():35    return HTML_END36def create_table(table_header=None, table_cells=None, border_size=1):37    string_list = []38    string_list.append("<table border='%d'>" % border_size)39    if table_header:40        string_list.append("<tr>")41        header_str = ""42        for table_header in table_header:43            if table_header.startswith("bgcolor"):44                header_str += "<th %s</th>" % table_header45            else:46                header_str += "<th>%s</th>" % table_header47        header_str += "\n"48        string_list.append(header_str)49    if table_cells:50        for table_row in table_cells:51            if str(table_row[0]).startswith("bgcolor"):52                row_str = "<tr %s>" % str(table_row[0])53                table_row.pop(0)54            else:55                row_str = "<tr>"56            for cell in table_row:57                cell_str = str(cell)58                if cell_str.startswith("<td bgcolor"):59                    row_str += cell_str60                elif cell_str.startswith("bgcolor"):61                    row_str += "<td %s" % cell_str62                else:63                    row_str += "<td>%s</td>" % cell_str64            row_str += "</tr>\n"65            string_list.append(row_str)66    if table_header:67        string_list.append("</tr>")68    string_list.append("</table>")69    return string_list70def create_ref(ref_name, ref_text=None, hidden=True):71    if ref_text:72        return "<a href=#%s>%s</a>" % (ref_name, ref_text)73    else:74        return "<a href=#%s></a>" % ref_name75def create_toc(ref_list, title):76    toc_str = '<nav role="navigation" class="table-of-contents">'77    if title:78        toc_str += "<h2>%s</h2>" % title79    toc_str += "<ul>"80    for ref in ref_list:81        toc_str += '<li><a href="#%s">%s</a></li>' % (ref[0], ref[1])82    toc_str += "</ul></nav>"83    return toc_str84def create_json_images(image_list):85    json_item = {"type": "images", "items": []}86    for image in image_list:87        item = {}88        item["type"] = "image"89        item["suffix"] = image["filename"].split(".")[-1]90        item["title"] = image["title"]91        im = Image.open(image["filename"])92        item["xsize"] = im.size[0] / 293        item["ysize"] = im.size[1] / 294        item["value"] = base64.b64encode(open(image["filename"]).read())95        """96        if item.get("thumbnail_image_filename") is None:97            if thumbnailHeight is not None and thumbnailWidth is not None:98                item["thumbnailSuffix"] = pathToImage.split(".")[-1]99                item["thumbnailXsize"] = thumbnailHeight100                item["thumbnailYsize"] = thumbnailWidth101                item["thumbnailValue"] = base64.b64encode(open(pathToImage).read())102        else:103            item["thumbnailSuffix"] = pathToThumbnailImage.split(".")[-1]104            thumbnailIm = PIL.Image.open(pathToThumbnailImage)105            item["thumbnailXsize"] = thumbnailIm.size[0]106            item["thumbnailYsize"] = thumbnailIm.size[1]107            item["thumbnailValue"] = base64.b64encode(open(pathToThumbnailImage).read())108        """109        json_item["items"].append(item)110    return json_item111def generate_parallel_processing_report(mesh_scan_results, params_dict):112    json_dict = {"items": []}113    html_file = open(params_dict["html_file_path"], "w")114    html_file.write('<div align="CENTER">\n')115    if params_dict["lines_num"] > 1:116        json_dict["items"].append({"type": "title", "value": "Mesh scan results"})117        html_file.write(HTML_START % "Mesh scan results")118    else:119        html_file.write(HTML_START % "Line scan results")120        json_dict["items"].append({"type": "title", "value": "Line scan results"})121    html_file.write(create_image("parallel_processing_plot.png"))122    html_file.write("</br>")123    html_file.write(create_text("Scan parameters", heading=1))124    osc_range_per_line = params_dict["osc_range"] * (params_dict["images_per_line"] - 1)125    table_cells = [126        ("Number of lines", str(params_dict["lines_num"])),127        ("Frames per line", str(params_dict["images_per_line"])),128    ]129    if params_dict["lines_num"] > 1:130        table_cells.extend(131            (132                (133                    "Grid size",134                    "%d x %d microns"135                    % (136                        (params_dict["steps_x"] * params_dict["xOffset"] * 1000),137                        (params_dict["steps_y"] * params_dict["yOffset"] * 1000),138                    ),139                ),140                (141                    "Scan area",142                    "%d x %d microns"143                    % ((params_dict["dx_mm"] * 1000), (params_dict["dy_mm"] * 1000)),144                ),145                (146                    "Horizontal distance between frames",147                    "%d microns" % (params_dict["xOffset"] * 1000),148                ),149                (150                    "Vertical distance between frames",151                    "%d microns" % (params_dict["xOffset"] * 1000),152                ),153                ("Osciallation middle", "%.1f" % params_dict["osc_midle"]),154                ("Osciallation range per frame", "%.2f" % params_dict["osc_range"]),155                (156                    "Osciallation range per line",157                    "%.2f (from %.2f to %2.f)"158                    % (159                        osc_range_per_line,160                        (params_dict["osc_midle"] - osc_range_per_line / 2),161                        (params_dict["osc_midle"] + osc_range_per_line / 2),162                    ),163                ),164            )165        )166    table_rec = create_table(table_cells=table_cells, border_size=0)167    for row in table_rec:168        html_file.write(row)169    html_file.write("</br>")170    positions = mesh_scan_results.get("best_positions", [])171    if len(positions) > 0:172        html_file.write(create_text("Best position", heading=1))173        html_file.write("</br>")174        html_file.write('<font size="2">')175        table_cells = [176            [177                "%d" % positions[0]["index"],178                "<b>%.2f<b>" % positions[0]["score"],179                "<b>%d</b>" % positions[0]["spots_num"],180                "%.1f" % positions[0]["spots_resolution"],181                positions[0]["filename"],182                "%d" % (positions[0]["col"] + 0.5),183                "%d" % (positions[0]["row"] + 0.5),184            ]185        ]186        table_rec = create_table(187            [188                "Index",189                "<b>Score</b>",190                "<b>Number of spots</b>",191                "Resolution",192                "File name",193                "Column",194                "Row",195            ],196            table_cells,197        )198        for row in table_rec:199            html_file.write(row)200        html_file.write("</br>")201        if len(positions) > 1:202            html_file.write(create_text("All positions", heading=1))203            html_file.write("</br>")204            table_cells = []205            for position in positions[1:]:206                table_cells.append(207                    (208                        position["index"],209                        "<b>%.2f</b>" % position["score"],210                        "<b>%d</b>" % position["spots_num"],211                        "%.1f" % position["spots_resolution"],212                        position["filename"],213                        "%d" % (position["col"] + 0.5),214                        "%d" % (position["row"] + 0.5),215                    )216                )217            table_rec = create_table(218                [219                    "Index",220                    "<b>Score</b>",221                    "<b>Number of spots</b>",222                    "Resolution",223                    "File name",224                    "Column",225                    "Row",226                ],227                table_cells,228            )229            for row in table_rec:230                html_file.write(row)231            html_file.write("</br>")232        html_file.write("</font>")233    html_file.write("</div>\n")234    html_file.write(HTML_END)235    html_file.close()236    image = {"title": "plot", "filename": params_dict["cartography_path"]}237    json_dict["items"].append(create_json_images([image]))...grabber_handler.py
Source:grabber_handler.py  
1# -*- coding: utf-8 -*-2""" Grabs html, parses, and saves json to disk. """3from __future__ import unicode_literals4import datetime, json, pprint, sys5import requests6from bs4 import BeautifulSoup7from clusters_api.config import settings8# from clusters_api.utils import logger_setup9class Grabber(object):10    """ TODO: once server acls are set up, re-enable logging. """11    def __init__( self ):12        """ Sets up basics. """13        self.parser = None14        self.parser = Parser()15    # def __init__( self, log ):16    #     """ Sets up basics. """17    #     self.log = log18    #     self.parser = None19    #     self.parser = Parser( self.log )20    def update_data( self ):21        """ Accesses source html, parses it, and saves json to disk. """22        r = requests.get( settings.SOURCE_URL )23        html = r.content.decode( 'utf-8' )24        clusters_dict = self.parser.parse_cluster_html( html )25        save_dict = {26            'datetime_updated': unicode( datetime.datetime.now() ),27            'counts': clusters_dict }28        jstring = json.dumps( save_dict, sort_keys=True, indent=2 )29        with open( settings.JSON_FILE_PATH, 'w' ) as f:30            f.write( jstring )31        return32class Parser(object):33    def __init__( self ):34       """ Sets up basics. """35       self.cluster_name_mapper = {  # source-html-name: api-name36            'Rock 1st Floor': 'rock-level-1',37            'Rock 2nd Floor': 'rock-level-2-main',38            'Rock Grad': 'rock-level-2-grad',39            'Friedman': 'scili-friedman',40            'SciLi Mezz': 'scili-mezzanine' }41    # def __init__( self, log ):42    #    """ Sets up basics. """43    #    self.log = log44    #    self.cluster_name_mapper = {  # source-html-name: api-name45    #         'Rock 1st Floor': 'rock-level-1',46    #         'Rock 2nd Floor': 'rock-level-2-main',47    #         'Rock Grad': 'rock-level-2-grad',48    #         'Friedman': 'scili-friedman',49    #         'SciLi Mezz': 'scili-mezzanine' }50    def parse_cluster_html( self, html ):51        """ Takes source html.52            Parses out cluster data.53            Returns dict.54            Note: this used the mobile site, which doesn't directly contain all the info needed. """55        table_rows = self._grab_cluster_tablerows( html )56        data_dict = {}57        for row in table_rows:58          title = self._extract_title( row )59          if title in self.cluster_name_mapper.keys():60            count_dict = self._extract_counts( row )61            data_dict[ self.cluster_name_mapper[title] ] = count_dict  # takes, eg, title 'Rock 1st Floor' and stores key as 'rock-level-1'62        api_data_dict = self._tweak_counts( data_dict )63        return api_data_dict64    def _grab_cluster_tablerows( self, html ):65        """ Helper. Grabs cluster table-row objects from html.66            Returns list of BeautifulSoup dom objects. """67        soup = BeautifulSoup( html )68        table_rows = soup.findAll( 'tr' )69        relevant_tablerows = []70        for row in table_rows:71            table_cells = row.findAll( 'td' )72            if len( table_cells ) == 9:73                relevant_tablerows.append( row )74        return relevant_tablerows75    def _extract_title( self, row ):76        """ Helper. Grabs title from table-row object.77            Returns unicode-string or None. """78        title_cell = row.findAll( 'td' )[0]79        a_link = title_cell.findAll( 'a' )80        title = None81        if len( a_link ) > 0:  # goal: '''[<a href="javascript:loadPieChart(11)">Rock 1st Floor</a>]'''82            title = unicode( a_link[0].string )83        return title84    def _extract_counts( self, row ):85        """ Helper. Grabs count info from table-row object.86            Returns dict; counts are integers. """87        table_cells = row.findAll( 'td' )88        rawdata_count_names = [ 'In Use', 'Available Stations', 'Unavailable Stations', 'Offline Stations', 'Total Stations' ]  # don't re-order; this is order in rawdata89        count_dict = {}; i = 090        for cell in table_cells:91            try:92               count = int( cell.string )93               count_dict[ rawdata_count_names[i] ] = count94               i += 195            except:96              pass97        return count_dict98    def _tweak_counts( self, data_dict ):99        """ Helper. Updates count_dict labels to api-compatible ones; adds useful 'calculated_available' data.100            Returns dict. """101        updated_data_dict = {}102        for key, value in data_dict.items():103            cluster_name = key; count_dict = value104            updated_count_dict = {105                'available': count_dict['Available Stations'],106                'calculated_available': count_dict['Available Stations'] + count_dict['Offline Stations'],107                'in_use': count_dict['In Use'],108                'offline': count_dict['Offline Stations'],109                'total': count_dict['Total Stations'] }110            updated_data_dict[cluster_name] = updated_count_dict111        return updated_data_dict112if __name__ == '__main__':113    """ Assumes env is activated.114        Called by cron script.115        TODO: once server acls are set up, re-enable logging. """116    try:117        grabber = Grabber()118        grabber.update_data()119    except Exception as e:120        message = '- in grabber_handler.__main__; exception updating data, %s' % unicode(repr(e))121        print message122# if __name__ == '__main__':123#     """ Assumes env is activated.124#         Called by cron script. """125#     try:126#         log = logger_setup.setup_logger()127#     except Exception as e:128#         print '- in grabber_handler.__main__; exception setting up logger, %s' % unicode(repr(e))129#         sys.exit()130#     try:131#         grabber = Grabber( log )132#         grabber.update_data()133#     except Exception as e:134#         message = '- in grabber_handler.__main__; exception updating data, %s' % unicode(repr(e))135#         print message...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
