Best Python code snippet using robotframework
generator.py
Source:generator.py  
1"""File:        generator.py2   Author:      Nathan Robinson3   Contact:     nathan.m.robinson@gmail.com4   Date:        2013-11-135   Desctiption: Generate cave diver PDDL problems.6   Lisence:     Copyright (c) Year 2013, Nathan Robinson <nathan.m.robinson@gmail.com>7                                         Christian Muise <christian.muise@gmail.com>8                                         Charles Gretton <charles.gretto@gmail.com>9                Permission to use, copy, modify, and/or distribute this software for any10                purpose with or without fee is hereby granted, provided that the above11                copyright notice and this permission notice appear in all copies.12                THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES13                WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF14                MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY15                SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES16                WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN17                ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR18                IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.19            To do:20            1. partial order reduction option21            2. Make it easier to specify what tunnels to make22            3. document and send in23"""24import generator_cmd_line25from cmd_line import InputException26import sys, random, itertools27eps = 0.0128def make_caves(branch_depths):29    """ Generate a random tree with the branches of the given lengths.30        Return the nodes, the node depths, and the leaf nodes.31        ([int]) -> [(int, int)], [int], [int]32    """33    edges = [(x, x+1) for x in xrange(branch_depths[0])]34    node_depths = range(branch_depths[0]+1)35    nodes = branch_depths[0]+136    leaves = [nodes-1]37    for branch in branch_depths[1:]:38        junction = random.choice([x for x in xrange(nodes) if node_depths[x] < branch])39        length = branch - node_depths[junction]40        edges.append((junction, nodes))41        node_depths.append(node_depths[junction]+1)42        for nid, new_node in enumerate(xrange(nodes, nodes+length-1)):43            edges.append((new_node, new_node+1))44            node_depths.append(node_depths[junction]+2+nid)45        nodes += length46        leaves.append(nodes-1)47    return edges, node_depths, leaves48def make_objectives(objective_depths, node_depths, leaves):49    """ Make num_objectives objectives at leaves with the specified depths.50        ([int], [int]) -> [int]51    """52    objectives = []53    for obj_d in objective_depths:54        candidates = [n for n in leaves\55            if node_depths[n] == obj_d and n not in objectives]56        if not candidates:57            raise Exception("Not enough leaf nodes with depth " +\58                str(obj_d) + " for objective.")59        objectives.append(random.choice(candidates))60    return objectives61def make_tanks_and_divers(node_depths, objectives, num_tank_adjustment,62    num_diver_adjustment, ordered_tanks):63    """ Make the required number of tank and diver objects.64        |Tanks| = 2^(depth+1). (1 dummy (if ordered))65        |Divers| = 2^(depth-1)66        Adjust the number of tanks and divers by num_tank_adjustment and67        num_diver_adjustment. If either of these numbers is negative, the68        problem will be unsatisfiable.69        ([int], [int], int, int, bool) -> [str], [str]70    """71    num_tanks = num_tank_adjustment - 172    num_divers = num_diver_adjustment73    for obj in objectives:74        num_tanks += pow(2, node_depths[obj]+1)75        num_divers += pow(2, node_depths[obj]-1)76    tanks = ['t' + str(x) for x in xrange(num_tanks)]77    if ordered_tanks:78        tanks.append('dummy')79    divers = ['d' + str(x) for x in xrange(num_divers)]80    return tanks, divers81def make_positive_relationships(objectives, node_depths):82    """ Create a (transitively closed) graph of relationships showing which83        divers depend on other divers to deliver them tanks to satisfy the84        objectives.85        Process - Start at each objective and walk back to the entrance keeping86        a list of divers as we go. At each step we need 1 additional diver to87        service each diver in our list.88        ([int], [int]) -> set([(int, int)])89    """90    cur_d = 091    pos_relationships = set()92    for obj in objectives:93        obj_divers = [cur_d]94        cur_d += 195        depth = node_depths[obj]-196        while depth > 0:97            new_divers = range(cur_d, cur_d + len(obj_divers))98            for diver in obj_divers:99                for n_diver in new_divers:100                    pos_relationships.add((n_diver, diver))101            obj_divers.extend(new_divers)102            cur_d += len(new_divers)103            depth -= 1104    return pos_relationships105def make_negative_relationships(pos_relationships, num_divers, neg_link_prob):106    """ Make a set of negative relationships where divers preclude each other.107        For the problem to be satisfiable there must just be an ordering over108        the divers that works. Lets assume that the positive relationships109        represent this order. We are then able to rule out everything else.110        In fact, we we have a neg_link_prob chance of ruling out a non-positive111        link.112        (set([(int, int)]), int, ...) -> { int : [int] }113    """114    neg_relationships = dict([(x, list()) for x in xrange(num_divers)])115    for (diver1, diver2) in itertools.combinations(xrange(num_divers), 2):116        if (diver1, diver2) not in pos_relationships and random.random() < neg_link_prob:117            neg_relationships[diver1].append(diver2)118    return neg_relationships119def add_neg_cycle(neg_relationships, num_divers, neg_cycle_frac):120    """ Adds a negative cycle to the diver relationships, making the problem121        have no solutions. num_divers * neg_cycle_frac (min 2) divers are122        involved.123        ({ int  : [int] }, int, float) -> None124    """125    divers = random.sample(xrange(num_divers), max(2, int(num_divers*neg_cycle_frac)))126    for did, diver1 in enumerate(divers):127        diver2 = divers[(did+1)%len(divers)]128        if diver2 not in neg_relationships[diver1]:129            neg_relationships[diver1].append(diver2)130def make_hiring_costs(neg_relationships, min_cost, max_cost, perturb):131    """ Make the hiring costs for the divers. The costs are inversely132        proportional to the number of negative relation ships a diver has.133        They are perturbed by perturb.134        ({ int  : [int] }, int, int, float) -> { int : int }135    """136    divers = neg_relationships.keys()137    num_rels = [len(x) for x in neg_relationships.values()]138    min_rels, max_rels = min(num_rels), max(num_rels)139    rel_range = max_rels - min_rels140    cost_range = max_cost - min_cost141    if cost_range == 0:142        return dict([(d, min_cost) for d in divers])143    if rel_range == 0:144        mid_cost = int(min_cost + cost_range / 2.0)145        return dict([(d, mid_cost) for d in divers])146    rel_dict = dict([(n, list()) for n in num_rels])147    for nid, num in enumerate(num_rels):148        rel_dict[num].append(divers[nid])149    sorted_rels = sorted(zip(rel_dict.keys(), rel_dict.values()))150    sorted_rels.reverse()151    hiring_costs = {}152    cost_inc = cost_range / float(len(sorted_rels))153    for rid, (nr, nr_d) in enumerate(sorted_rels):154        for d in nr_d:155            base_cost = min_cost + cost_inc*rid156            base_cost += random.random()*2*perturb*base_cost - perturb*base_cost157            base_cost = max(min_cost, min(max_cost, int(base_cost)))158            hiring_costs[d] = base_cost159    return hiring_costs160def write_domain_file(file_name,  divers, neg_relationships, strips, ordered_tanks):161    """ Write the PDDL domain file to file_name.162        (str, [int], { int : [int] }, bool, bool) -> None163    """164    try:165        output_file = file(file_name, 'w')166        if strips:167            output_file.write(";; Cave Diving STRIPS\n")168        else:169            output_file.write(";; Cave Diving ADL\n")170        output_file.write(";; Authors: Nathan Robinson,\n")171        output_file.write(";;          Christian Muise, and\n")172        output_file.write(";;          Charles Gretton\n\n")173        if strips:174            output_file.write("(define (domain cave-diving-strips)\n")175        else:176            output_file.write("(define (domain cave-diving-adl)\n")177        output_file.write("  (:requirements :typing)\n")178        output_file.write("  (:types location diver tank quantity)\n")179        output_file.write("  (:predicates\n")180        output_file.write("    (at-tank ?t - tank ?l - location)\n")181        output_file.write("    (in-storage ?t - tank)\n")182        output_file.write("    (full ?t - tank)\n")183        if ordered_tanks:184            output_file.write("    (next-tank ?t1 - tank ?t2 - tank)\n")185        output_file.write("    (at-diver ?d - diver ?l - location)\n")186        output_file.write("    (available ?d - diver)\n")187        output_file.write("    (at-surface ?d - diver)\n")188        output_file.write("    (decompressing ?d - diver)\n")189        if not strips:190            output_file.write("    (precludes ?d1 - diver ?d2 - diver)\n")191        output_file.write("    (cave-entrance ?l - location)\n")192        output_file.write("    (connected ?l1 - location ?l2 - location)\n")193        output_file.write("    (next-quantity ?q1 - quantity ?q2 - quantity)\n")194        output_file.write("    (holding ?d - diver ?t - tank)\n")195        output_file.write("    (capacity ?d - diver ?q - quantity)\n")196        output_file.write("    (have-photo ?l - location)\n")197        output_file.write("    (in-water )\n")198        199        output_file.write("  )\n\n")200        #output_file.write("  (:functions\n")201        #output_file.write("    (hiring-cost ?d - diver) - number\n")202        #output_file.write("    (other-cost) - number\n")203        #output_file.write("    (total-cost) - number\n")204        #output_file.write("  )\n\n")205        # Divers are made constants given the strips usage206        num_diver_lines = len(divers)/20 + 1207        ordered_divers = ['d' + str(x) for x in xrange(len(divers))]208        output_file.write("  (:constants\n")209        for d_line in xrange(num_diver_lines):210            output_file.write("    " + " ".join(ordered_divers[(d_line*20):(d_line*20+20)]) + " - diver\n")211        output_file.write("  )\n\n")212        if not strips:213            output_file.write("  (:action hire-diver\n")214            output_file.write("    :parameters (?d1 - diver)\n")215            output_file.write("    :precondition (and      (available ?d1)\n")216            output_file.write("                       (not (in-water)) \n")217            output_file.write("                  )\n")218            output_file.write("    :effect (and (at-surface ?d1)\n")219            output_file.write("                 (not (available ?d1))\n")220            output_file.write("                 (forall (?d2 - diver)\n")221            output_file.write("                     (when (precludes ?d1 ?d2) (not (available ?d2))))\n")222            output_file.write("                 (in-water)\n")223            output_file.write("                 (increase (total-cost) (hiring-cost ?d1))\n")224            output_file.write("            )\n")225            output_file.write("  )\n\n")226        else:227            for did, diver1 in enumerate(divers):228                output_file.write("  (:action hire-diver-" + diver1 + "\n")229                output_file.write("    :parameters ( )\n")230                output_file.write("    :precondition (and (available " + diver1 + "))\n")231                output_file.write("    :effect (and (at-surface " + diver1 + ")\n")232                output_file.write("                 (not (available " + diver1 + "))\n")233                #for diver2 in neg_relationships[did]:234                #    output_file.write("                 (not (available " + divers[diver2] + "))\n")235                #output_file.write("                 (increase (total-cost) (hiring-cost " + diver1 + "))\n")236                output_file.write("            )\n")237                output_file.write("  )\n\n")238        output_file.write("  (:action prepare-tank\n")239        if ordered_tanks:240            output_file.write("    :parameters (?d - diver ?t1 ?t2 - tank ?q1 ?q2 - quantity)\n")241        else:242            output_file.write("    :parameters (?d - diver ?t1 - tank ?q1 ?q2 - quantity)\n")243        output_file.write("    :precondition (and (at-surface ?d)\n")244        output_file.write("                       (in-storage ?t1)\n")245        output_file.write("                       (next-quantity ?q1 ?q2)\n")246        output_file.write("                       (capacity ?d ?q2)\n")247        if ordered_tanks:248            output_file.write("                       (next-tank ?t1 ?t2)\n")249        output_file.write("                  )\n")250        output_file.write("    :effect (and (not (in-storage ?t1))\n")251        output_file.write("                 (not (capacity ?d ?q2))\n")252        if ordered_tanks:253            output_file.write("                      (in-storage ?t2)\n")254        output_file.write("                      (full ?t1)\n")255        output_file.write("                      (capacity ?d ?q1)\n")256        output_file.write("                      (holding ?d ?t1)\n")257        #output_file.write("                 (increase (total-cost) (other-cost ))\n")258        output_file.write("            )\n")259        output_file.write("  )\n\n")260        output_file.write("  (:action enter-water\n")261        output_file.write("    :parameters (?d - diver ?l - location)\n")262        output_file.write("    :precondition (and (at-surface ?d)\n")263        output_file.write("                       (cave-entrance ?l)\n")264        output_file.write("                  )\n")265        output_file.write("    :effect (and (not (at-surface ?d))\n")266        output_file.write("                      (at-diver ?d ?l)\n")267        #output_file.write("                 (increase (total-cost) (other-cost ))\n")268        output_file.write("            )\n")269        output_file.write("  )\n\n")270        output_file.write("  (:action pickup-tank\n")271        output_file.write("    :parameters (?d - diver ?t - tank ?l - location ?q1 ?q2 - quantity)\n")272        output_file.write("    :precondition (and (at-diver ?d ?l)\n")273        output_file.write("                       (at-tank ?t ?l)\n")274        output_file.write("                       (next-quantity ?q1 ?q2)\n")275        output_file.write("                       (capacity ?d ?q2)\n")276        output_file.write("                  )\n")277        output_file.write("    :effect (and (not (at-tank ?t ?l))\n")278        output_file.write("                 (not (capacity ?d ?q2))\n")279        output_file.write("                      (holding ?d ?t)\n")280        output_file.write("                      (capacity ?d ?q1)\n")281        #output_file.write("                 (increase (total-cost) (other-cost ))\n")282        output_file.write("            )\n")283        output_file.write("  )\n\n")284        output_file.write("  (:action drop-tank\n")285        output_file.write("    :parameters (?d - diver ?t - tank ?l - location ?q1 ?q2 - quantity)\n")286        output_file.write("    :precondition (and (at-diver ?d ?l)\n")287        output_file.write("                       (holding ?d ?t)\n")288        output_file.write("                       (next-quantity ?q1 ?q2)\n")289        output_file.write("                       (capacity ?d ?q1)\n")290        output_file.write("                  )\n")291        output_file.write("    :effect (and (not (holding ?d ?t))\n")292        output_file.write("                 (not (capacity ?d ?q1))\n")293        output_file.write("                      (at-tank ?t ?l)\n")294        output_file.write("                      (capacity ?d ?q2)\n")295        #output_file.write("                 (increase (total-cost) (other-cost ))\n")296        output_file.write("            )\n")297        output_file.write("  )\n\n")298        output_file.write("  (:action swim\n")299        output_file.write("    :parameters (?d - diver ?t - tank ?l1 ?l2 - location)\n")300        output_file.write("    :precondition (and (at-diver ?d ?l1)\n")301        output_file.write("                       (holding ?d ?t)\n")302        output_file.write("                       (full ?t)\n")303        output_file.write("                       (connected ?l1 ?l2)\n")304        output_file.write("                  )\n")305        output_file.write("    :effect (and (not (at-diver ?d ?l1))\n")306        output_file.write("                 (not (full ?t))\n")307        output_file.write("                      (at-diver ?d ?l2)\n")308        #output_file.write("                 (increase (total-cost) (other-cost ))\n")309        output_file.write("            )\n")310        output_file.write("  )\n\n")311        output_file.write("  (:action photograph\n")312        output_file.write("    :parameters (?d - diver ?l - location ?t - tank)\n")313        output_file.write("    :precondition (and (at-diver ?d ?l)\n")314        output_file.write("                       (holding ?d ?t)\n")315        output_file.write("                       (full ?t)\n")316        output_file.write("                  )\n")317        output_file.write("    :effect (and (not (full ?t))\n")318        output_file.write("                      (have-photo ?l)\n")319        #output_file.write("                 (increase (total-cost) (other-cost ))\n")320        output_file.write("            )\n")321        output_file.write("  )\n\n")322        output_file.write("  (:action decompress\n")323        output_file.write("    :parameters (?d - diver ?l - location)\n")324        output_file.write("    :precondition (and (at-diver ?d ?l)\n")325        output_file.write("                       (cave-entrance ?l)\n")326        output_file.write("                  )\n")327        output_file.write("    :effect (and (not (at-diver ?d ?l))\n")328        output_file.write("                      (decompressing ?d)\n")329        output_file.write("                 (not (in-water))\n")330        #output_file.write("                 (increase (total-cost) (other-cost ))\n")331        output_file.write("            )\n")332        output_file.write("  )\n\n")333        output_file.write(")\n")334        output_file.close()335    except IOError:336        print "Error: could not write to the domain file:", file_name337def write_problem_file(file_name, problem_name, num_locations, tanks, divers,338    objectives, edges, neg_relationships, hiring_costs, other_action_cost,339    strips, ordered_tanks):340    """ Write the PDDL problem file to file_name.341        (str, str, int, [str], [str], [int], [(int, int)], { int : [int] },342            { int : int }, int, bool, bool) -> None343    """344    try:345        output_file = file(file_name, 'w')346        if strips:347            output_file.write(";; Cave Diving STRIPS\n")348        else:349            output_file.write(";; Cave Diving ADL\n")350        output_file.write(";; Authors: Nathan Robinson,\n")351        output_file.write(";;          Christian Muise, and\n")352        output_file.write(";;          Charles Gretton\n\n")353        if strips:354            output_file.write("(define (problem cave-diving-strips-" + problem_name + ")\n")355            output_file.write("  (:domain cave-diving-strips)\n")356        else:357            output_file.write("(define (problem cave-diving-adl-" + problem_name + ")\n")358            output_file.write("  (:domain cave-diving-adl)\n")359        output_file.write("  (:objects\n")360        output_file.write("    " + " ".join(\361            ['l' + str(x) for x in xrange(num_locations)]) + " - location\n")362        num_diver_lines = len(divers)/20 + 1363        ordered_divers = ['d' + str(x) for x in xrange(len(divers))]364        #for d_line in xrange(num_diver_lines):365        #    output_file.write("    " + " ".join(ordered_divers[(d_line*20):(d_line*20+20)]) + " - diver\n")366        num_tank_lines = len(tanks)/20 + 1367        for t_line in xrange(num_tank_lines):368            output_file.write("    " + " ".join(tanks[(t_line*20):(t_line*20+20)]) + " - tank\n")369        output_file.write("    zero one two three four - quantity\n")370        output_file.write("  )\n\n")371        output_file.write("  (:init\n")372        for diver in ordered_divers:373            output_file.write("    (available " + diver + ")\n")374        for diver in ordered_divers:375            output_file.write("    (capacity " + diver + " four)\n")376        if ordered_tanks:377            output_file.write("    (in-storage " + tanks[0] + ")\n")378            for tid, tank in enumerate(tanks[:-1]):379                output_file.write("    (next-tank " + tank + " " + tanks[tid+1] + ")\n")380        else:381            for tank in tanks:382                output_file.write("    (in-storage " + tank + ")\n")383        output_file.write("    (cave-entrance l0)\n")384        for edge in edges:385            output_file.write("    (connected l" + str(edge[0]) + " l" + str(edge[1]) + ")\n")386            output_file.write("    (connected l" + str(edge[1]) + " l" + str(edge[0]) + ")\n")387        output_file.write("    (next-quantity zero one)\n")388        output_file.write("    (next-quantity one two)\n")389        output_file.write("    (next-quantity two three)\n")390        output_file.write("    (next-quantity three four)\n")391        if not strips:392            for did1, diver1 in enumerate(divers):393                for diver2 in neg_relationships[did1]:394                    output_file.write("    (precludes " + diver1 + " " + divers[diver2] + ")\n")395        #for did, diver in enumerate(divers):396        #    output_file.write("    (= (hiring-cost " + diver + ") " + str(hiring_costs[did]) + ")\n")397        #output_file.write("    (= (other-cost ) " + str(other_action_cost) + ")\n")398        #output_file.write("    (= (total-cost) 0)\n")399        output_file.write("  )\n\n")400        output_file.write("  (:goal\n")401        output_file.write("    (and\n")402        for obj in objectives:403            output_file.write("      (have-photo l" + str(obj) + ")\n")404        for diver in divers:405            output_file.write("      (decompressing " + diver + ")\n")406        output_file.write("    )\n  )\n\n")407        #output_file.write("  (:metric minimize (total-cost))\n\n")408        output_file.write(")\n")409        output_file.close()410    except IOError:411        print "Error: could not write to the problem file:", file_name412def main():413    args = generator_cmd_line.process_args()414    random.seed(args.seed)415    edges, node_depths, leaves = make_caves(args.cave_branches)416    objectives = make_objectives(args.objectives, node_depths, leaves)417    tanks, divers = make_tanks_and_divers(node_depths, objectives,418        args.num_tank_adjustment, args.num_diver_adjustment, args.order_tanks)419    pos_relationships = make_positive_relationships(objectives, node_depths)420    neg_relationships = make_negative_relationships(pos_relationships,421        len(divers), args.neg_link_prob)422    if args.neg_cycle_length:423        add_neg_cycle(neg_relationships, len(divers), args.neg_cycle_length)424    hiring_costs = make_hiring_costs(neg_relationships, args.minimum_hiring_cost,425        args.maximum_hiring_cost, args.perturb_hiring_costs)426    random.shuffle(divers)427    if not args.quiet:428        print429        print "Edges: ", ", ".join(map(str, edges))430        print "Depths:", ", ".join(map(str, node_depths))431        print "Objectives:", ", ".join(map(str, objectives))432        print "Tanks:", ", ".join(map(str, tanks))433        print "Divers:", ", ".join(map(str, divers))434        print "Positive relationships:", ", ".join(map(str, pos_relationships))435        print "Negative relationships:", neg_relationships436        print "Hiring costs:", hiring_costs437    if args.domain_file_name:438        write_domain_file(args.domain_file_name, divers, neg_relationships,439            args.strips, args.order_tanks)440    write_problem_file(args.problem_file_name, args.problem_name, len(node_depths),441        tanks, divers, objectives, edges, neg_relationships, hiring_costs,442            args.other_action_cost, args.strips, args.order_tanks)443if __name__ == "__main__":...milestone5.py
Source:milestone5.py  
1#!/usr/bin/python2import sys, string3from random import choice4import random5from string import ascii_lowercase6from scipy.stats import beta, uniform7import numpy as np8import struct9import pandas as pd10import math11import data_gen_utils12# note this is the base path to the data files we generate13TEST_BASE_DIR = "/cs165/generated_data"14# note this is the base path that _POINTS_ to the data files we generate15DOCKER_TEST_BASE_DIR = "/cs165/staff_test"16############################################################################17# Notes: You can generate your own scripts for generating data fairly easily by modifying this script.18# 19############################################################################20def generateDataMilestone5(dataSize):21    outputFile = TEST_BASE_DIR + '/data5.csv'22    header_line = data_gen_utils.generateHeaderLine('db1', 'tbl5', 4)23    outputTable = pd.DataFrame(np.random.randint(0, dataSize/5, size=(dataSize, 4)), columns =['col1', 'col2', 'col3', 'col4'])24    # This is going to have many, many duplicates for large tables!!!!25    outputTable['col1'] = np.random.randint(0,1000, size = (dataSize))26    outputTable['col2'] = np.random.randint(0,1000, size = (dataSize))27    outputTable['col3'] = np.random.randint(0,10000, size = (dataSize))28    outputTable['col4'] = np.random.randint(0,10000, size = (dataSize))29    outputTable.to_csv(outputFile, sep=',', index=False, header=header_line, line_terminator='\n')30    return outputTable31    32def createTest38(dataTable):33    # prelude34    output_file, exp_output_file = data_gen_utils.openFileHandles(38, TEST_DIR=TEST_BASE_DIR)35    output_file.write('-- Correctness test: Do inserts in tbl5.\n')36    output_file.write('--\n')37    output_file.write('-- Let table tbl5 have a secondary index (col2) and a clustered index (col3), so, all should be maintained when we insert new data.\n')38    output_file.write('-- This means that the table should be always sorted on col3 and the secondary indexes on col2 should be updated\n')39    output_file.write('--\n')40    output_file.write('-- Create Table\n')41    output_file.write('create(tbl,"tbl5",db1,4)\n')42    output_file.write('create(col,"col1",db1.tbl5)\n')43    output_file.write('create(col,"col2",db1.tbl5)\n')44    output_file.write('create(col,"col3",db1.tbl5)\n')45    output_file.write('create(col,"col4",db1.tbl5)\n')46    output_file.write('-- Create a clustered index on col1\n')47    output_file.write('create(idx,db1.tbl5.col1,sorted,clustered)\n')48    output_file.write('-- Create an unclustered btree index on col2\n')49    output_file.write('create(idx,db1.tbl5.col2,btree,unclustered)\n')50    output_file.write('--\n')51    output_file.write('--\n')52    output_file.write('-- Load data immediately in the form of a clustered index\n')53    output_file.write('load(\"'+DOCKER_TEST_BASE_DIR+'/data5.csv\")\n')54    output_file.write('--\n')55    output_file.write('-- INSERT INTO tbl5 VALUES (-1,-11,-111,-1111);\n')56    output_file.write('-- INSERT INTO tbl5 VALUES (-2,-22,-222,-2222);\n')57    output_file.write('-- INSERT INTO tbl5 VALUES (-3,-33,-333,-2222);\n')58    output_file.write('-- INSERT INTO tbl5 VALUES (-4,-44,-444,-2222);\n')59    output_file.write('-- INSERT INTO tbl5 VALUES (-5,-55,-555,-2222);\n')60    output_file.write('--\n')61    output_file.write('relational_insert(db1.tbl5,-1,-11,-111,-1111)\n')62    output_file.write('relational_insert(db1.tbl5,-2,-22,-222,-2222)\n')63    output_file.write('relational_insert(db1.tbl5,-3,-33,-333,-2222)\n')64    output_file.write('relational_insert(db1.tbl5,-4,-44,-444,-2222)\n')65    output_file.write('relational_insert(db1.tbl5,-5,-55,-555,-2222)\n')66    #output_file.write('shutdown\n')67    # update dataTable68    dataTable = dataTable.append({"col1":-1, "col2":-11, "col3": -111, "col4": -1111}, ignore_index = True)69    dataTable = dataTable.append({"col1":-2, "col2":-22, "col3": -222, "col4": -2222}, ignore_index = True)70    dataTable = dataTable.append({"col1":-3, "col2":-33, "col3": -333, "col4": -2222}, ignore_index = True)71    dataTable = dataTable.append({"col1":-4, "col2":-44, "col3": -444, "col4": -2222}, ignore_index = True)72    dataTable = dataTable.append({"col1":-5, "col2":-55, "col3": -555, "col4": -2222}, ignore_index = True)73    74    # no expected results75    data_gen_utils.closeFileHandles(output_file, exp_output_file)76    return dataTable77def createTest39(dataTable, approxSelectivity):78    output_file, exp_output_file = data_gen_utils.openFileHandles(39, TEST_DIR=TEST_BASE_DIR)79    dataSize = len(dataTable)80    offset = int(approxSelectivity * dataSize)81    highestHighVal = int((dataSize/2) - offset)82    selectValLess = np.random.randint(-55, -11)83    selectValGreater = selectValLess + offset84    selectValLess2 = np.random.randint(-10, 0)85    selectValGreater2 = selectValLess2 + offset86    output_file.write('-- Correctness test: Test for updates on columns with index\n')87    output_file.write('--\n')88    output_file.write('-- SELECT col1 FROM tbl5 WHERE col2 >= {} AND col2 < {};\n'.format(selectValLess, selectValGreater))89    output_file.write('--\n')90    output_file.write('s1=select(db1.tbl5.col2,{},{})\n'.format(selectValLess, selectValGreater))91    output_file.write('f1=fetch(db1.tbl5.col1,s1)\n')92    output_file.write('print(f1)\n')93    output_file.write('--\n')94    output_file.write('-- SELECT col3 FROM tbl5 WHERE col1 >= {} AND col1 < {};\n'.format(selectValLess2, selectValGreater2))95    output_file.write('--\n')96    output_file.write('s2=select(db1.tbl5.col1,{},{})\n'.format(selectValLess2, selectValGreater2))97    output_file.write('f2=fetch(db1.tbl5.col3,s2)\n')98    output_file.write('print(f2)\n')99    # generate expected results100    dfSelectMaskGT = dataTable['col2'] >= selectValLess101    dfSelectMaskLT = dataTable['col2'] < selectValGreater102    output = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col1']103    if len(output) > 0:104        exp_output_file.write(output.to_string(header=False,index=False))105        exp_output_file.write('\n\n')106    dfSelectMaskGT2 = dataTable['col1'] >= selectValLess2107    dfSelectMaskLT2 = dataTable['col1'] < selectValGreater2108    output = dataTable[dfSelectMaskGT2 & dfSelectMaskLT2]['col3']109    if len(output) > 0:110        exp_output_file.write(output.to_string(header=False,index=False))111        exp_output_file.write('\n')112    data_gen_utils.closeFileHandles(output_file, exp_output_file)113def createTests40(dataTable):114    output_file, exp_output_file = data_gen_utils.openFileHandles(40, TEST_DIR=TEST_BASE_DIR)115    output_file.write('-- Correctness test: Update values\n')116    output_file.write('--\n')117    output_file.write('-- UPDATE tbl5 SET col1 = -10 WHERE col1 = -1;\n')118    output_file.write('-- UPDATE tbl5 SET col1 = -20 WHERE col2 = -22;\n')119    output_file.write('-- UPDATE tbl5 SET col1 = -30 WHERE col1 = -3;\n')120    output_file.write('-- UPDATE tbl5 SET col1 = -40 WHERE col3 = -444;\n')121    output_file.write('-- UPDATE tbl5 SET col1 = -50 WHERE col1 = -5;\n')122    output_file.write('--\n')123    output_file.write('u1=select(db1.tbl5.col1,-1,0)\n')124    output_file.write('relational_update(db1.tbl5.col1,u1,-10)\n')125    output_file.write('u2=select(db1.tbl5.col2,-22,-21)\n')126    output_file.write('relational_update(db1.tbl5.col1,u2,-20)\n')127    output_file.write('u3=select(db1.tbl5.col1,-3,-2)\n')128    output_file.write('relational_update(db1.tbl5.col1,u3,-30)\n')129    output_file.write('u4=select(db1.tbl5.col3,-444,-443)\n')130    output_file.write('relational_update(db1.tbl5.col1,u4,-40)\n')131    output_file.write('u5=select(db1.tbl5.col1,-5,-4)\n')132    output_file.write('relational_update(db1.tbl5.col1,u5,-50)\n')133    output_file.write('shutdown\n')134   # update dataTable135    dfSelectMaskEq = dataTable['col1'] == -1136    dataTable.loc[dfSelectMaskEq,'col1']=-10137    dfSelectMaskEq = dataTable['col2'] == -22138    dataTable.loc[dfSelectMaskEq,'col1']=-20139    140    dfSelectMaskEq = dataTable['col1'] == -3141    dataTable.loc[dfSelectMaskEq,'col1']=-30142    143    dfSelectMaskEq = dataTable['col3'] == -444144    dataTable.loc[dfSelectMaskEq,'col1']=-40145    146    dfSelectMaskEq = dataTable['col1'] == -5147    dataTable.loc[dfSelectMaskEq,'col1']=-50148    # no expected results149    data_gen_utils.closeFileHandles(output_file, exp_output_file)150    return dataTable151def createTest41(dataTable):152    output_file, exp_output_file = data_gen_utils.openFileHandles(41, TEST_DIR=TEST_BASE_DIR)153    selectValLess = np.random.randint(-200, -100)154    selectValGreater = np.random.randint(10, 100)155    output_file.write('-- Correctness test: Run query after inserts and updates\n')156    output_file.write('--\n')157    output_file.write('-- SELECT col1 FROM tbl5 WHERE col2 >= {} AND col2 < {};\n'.format(selectValLess, selectValGreater))158    output_file.write('--\n')159    output_file.write('s1=select(db1.tbl5.col2,{},{})\n'.format(selectValLess, selectValGreater))160    output_file.write('f1=fetch(db1.tbl5.col1,s1)\n')161    output_file.write('print(f1)\n')162    # generate expected results163    dfSelectMask = (dataTable['col2'] >= selectValLess) & (dataTable['col2'] < selectValGreater)164    output = dataTable[dfSelectMask]['col1']165    exp_output_file.write(output.to_string(header=False,index=False))166    data_gen_utils.closeFileHandles(output_file, exp_output_file)167def createTest42(dataTable):168    output_file, exp_output_file = data_gen_utils.openFileHandles(42, TEST_DIR=TEST_BASE_DIR)169    output_file.write('-- Correctness test: Delete values and run queries after inserts, updates, and deletes\n')170    output_file.write('--\n')171    output_file.write('-- DELETE FROM tbl5 WHERE col1 = -10;\n')172    output_file.write('-- DELETE FROM tbl5 WHERE col2 = -22;\n')173    output_file.write('-- DELETE FROM tbl5 WHERE col1 = -30;\n')174    output_file.write('-- DELETE FROM tbl5 WHERE col3 = -444;\n')175    output_file.write('-- DELETE FROM tbl5 WHERE col1 = -50;\n')176    output_file.write('-- SELECT col1 FROM tbl5 WHERE col2 >= -100 AND col2 < 20;\n')177    output_file.write('--\n')178    output_file.write('d1=select(db1.tbl5.col1,-10,-9)\n')179    output_file.write('relational_delete(db1.tbl5,d1)\n')180    output_file.write('d2=select(db1.tbl5.col2,-22,-21)\n')181    output_file.write('relational_delete(db1.tbl5,d2)\n')182    output_file.write('d3=select(db1.tbl5.col1,-30,-29)\n')183    output_file.write('relational_delete(db1.tbl5,d3)\n')184    output_file.write('d4=select(db1.tbl5.col3,-444,-443)\n')185    output_file.write('relational_delete(db1.tbl5,d4)\n')186    output_file.write('d5=select(db1.tbl5.col1,-50,-49)\n')187    output_file.write('relational_delete(db1.tbl5,d5)\n')188    output_file.write('s1=select(db1.tbl5.col2,-100,20)\n')189    output_file.write('f1=fetch(db1.tbl5.col1,s1)\n')190    output_file.write('print(f1)\n')191    # update dataTable192    dataTable = dataTable[dataTable.col1!=-10]193    dataTable = dataTable[dataTable.col2!=-22]194    dataTable = dataTable[dataTable.col1!=-30]195    dataTable = dataTable[dataTable.col3!=-444]196    dataTable = dataTable[dataTable.col1!=-50]197    198    dfSelectMask1=dataTable['col2']>=-100 199    dfSelectMask2=dataTable['col2']<20200    output = dataTable[dfSelectMask1 & dfSelectMask2]['col1']201    if len(output) > 0:202        exp_output_file.write(output.to_string(header=False,index=False))203        exp_output_file.write('\n')204    data_gen_utils.closeFileHandles(output_file, exp_output_file)205    return dataTable206def createRandomUpdates(dataTable, numberOfUpdates, output_file):207    dataSize = len(dataTable)208    for i in range(numberOfUpdates):209        updatePos = np.random.randint(1, dataSize-1)210        col2Val = dataTable.values[updatePos][1]211        col1Val = dataTable.values[updatePos][0]212        output_file.write('-- UPDATE tbl5 SET col1 = {} WHERE col2 = {};\n'.format(col1Val+1, col2Val))213        output_file.write('u1=select(db1.tbl5.col2,{},{})\n'.format(col2Val, col2Val+1))214        output_file.write('relational_update(db1.tbl5.col1,u1,{})\n'.format(col1Val+1))215        output_file.write('--\n')216        dfSelectMaskEq = dataTable['col2'] == col2Val217        dataTable.loc[dfSelectMaskEq,'col1']=col1Val+1218    return dataTable219def createRandomDeletes(dataTable, numberOfUpdates, output_file):220    for i in range(numberOfUpdates):221        dataSize = len(dataTable)222        updatePos = np.random.randint(1, dataSize-1)223        col1Val = dataTable.values[updatePos][0]224        output_file.write('-- DELETE FROM tbl5 WHERE col1 = {};\n'.format(col1Val))225        output_file.write('d1=select(db1.tbl5.col1,{},{})\n'.format(col1Val, col1Val+1))226        output_file.write('relational_delete(db1.tbl5,d1)\n')227        output_file.write('--\n')228        dataTable = dataTable[dataTable.col1!=col1Val]229    return dataTable230def createRandomInserts(dataTable, numberOfInserts, output_file):231    for i in range(numberOfInserts):232        col1Val = np.random.randint(0,1000)233        col2Val = np.random.randint(0,1000)234        col3Val = np.random.randint(0,10000)235        col4Val = np.random.randint(0,10000)236        output_file.write('-- INSERT INTO tbl5 VALUES ({},{},{},{});\n'.format(col1Val, col2Val, col3Val, col4Val))237        output_file.write('relational_insert(db1.tbl5,{},{},{},{})\n'.format(col1Val, col2Val, col3Val, col4Val))238        dataTable = dataTable.append({"col1":col1Val, "col2":col2Val, "col3": col3Val, "col4": col4Val}, ignore_index = True)239        output_file.write('--\n')240    return dataTable241def createRandomSelects(dataTable, numberOfQueries, output_file, exp_output_file):242    lowestVal = dataTable['col2'].min()243    highestVal = dataTable['col2'].max()244    dataSize = len(dataTable)245    for i in range(numberOfQueries):246        selectValLess = np.random.randint(lowestVal-1, highestVal-1)247        selectValGreater = np.random.randint(selectValLess, highestVal)248        output_file.write('-- SELECT col1 FROM tbl5 WHERE col2 >= {} AND col2 < {};\n'.format(selectValLess, selectValGreater))249        output_file.write('s1=select(db1.tbl5.col2,{},{})\n'.format(selectValLess, selectValGreater))250        output_file.write('f1=fetch(db1.tbl5.col1,s1)\n')251        output_file.write('print(f1)\n')252        dfSelectMaskGT = dataTable['col2'] >= selectValLess253        dfSelectMaskLT = dataTable['col2'] < selectValGreater254        output = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col1']255        if len(output) > 0:256            exp_output_file.write(output.to_string(header=False,index=False))257            exp_output_file.write('\n')258        259def createTest43(dataTable):260    output_file, exp_output_file = data_gen_utils.openFileHandles(43, TEST_DIR=TEST_BASE_DIR)261    output_file.write('-- Scalability test: A large number of inserts, deletes and updates, followed by a number of queries\n')262    output_file.write('--\n')263    dataTable = createRandomInserts(dataTable, 100, output_file)264    dataTable = createRandomUpdates(dataTable, 100, output_file)265    dataTable = createRandomDeletes(dataTable, 100, output_file)266    createRandomSelects(dataTable, 5, output_file, exp_output_file)267    data_gen_utils.closeFileHandles(output_file, exp_output_file)268def generateMilestoneFiveFiles(dataSize,randomSeed=47):269    np.random.seed(randomSeed)270    dataTable = generateDataMilestone5(dataSize)271    dataTable = createTest38(dataTable)272    createTest39(dataTable, 0.1)273    dataTable = createTests40(dataTable)274    createTest41(dataTable)275    dataTable = createTest42(dataTable)276    createTest43(dataTable)277def main(argv):278    global TEST_BASE_DIR279    global DOCKER_TEST_BASE_DIR280    dataSize = int(argv[0])281    if len(argv) > 1:282        randomSeed = int(argv[1])283    else:284        randomSeed = 47285    286    if len(argv) > 2:287        TEST_BASE_DIR = argv[2]288        if len(argv) > 3:289            DOCKER_TEST_BASE_DIR = argv[3]290    generateMilestoneFiveFiles(dataSize, randomSeed=randomSeed)291if __name__ == "__main__":...milestone2.py
Source:milestone2.py  
1#!/usr/bin/python2import sys, string3from random import choice4import random5from string import ascii_lowercase6from scipy.stats import beta, uniform7import numpy as np8import struct9import pandas as pd10import data_gen_utils11# note this is the base path where we store the data files we generate12TEST_BASE_DIR = "/cs165/generated_data"13# note this is the base path that _POINTS_ to the data files we generate14DOCKER_TEST_BASE_DIR = "/cs165/staff_test"15#16# Example usage: 17#   python milestone2.py 10000 42 ~/repo/cs165-docker-test-runner/test_data /cs165/staff_test18#19############################################################################20# Notes: You can generate your own scripts for generating data fairly easily by modifying this script.21#22# To test functionality and speed, run your tests first on small data. Then when you are reasonably confident that your code works, move to bigger data sizes for speed.23# 24############################################################################25def generateDataMilestone2(dataSize):26    outputFile = TEST_BASE_DIR + '/data3_batch.csv'27    header_line = data_gen_utils.generateHeaderLine('db1', 'tbl3_batch', 4)28    outputTable = pd.DataFrame(np.random.randint(0, dataSize/5, size=(dataSize, 4)), columns =['col1', 'col2', 'col3', 'col4'])29    # This is going to have many, many duplicates for large tables!!!!30    outputTable['col1'] = np.random.randint(0,1000, size = (dataSize))31    outputTable['col4'] = np.random.randint(0,10000, size = (dataSize))32    outputTable['col4'] = outputTable['col4'] + outputTable['col1']33    outputTable.to_csv(outputFile, sep=',', index=False, header=header_line, line_terminator='\n')34    return outputTable35def createTestTen():36    # prelude37    output_file, exp_output_file = data_gen_utils.openFileHandles(10, TEST_DIR=TEST_BASE_DIR)38    output_file.write('-- Load Test Data 2\n')39    output_file.write('-- Create a table to run batch queries on\n')40    output_file.write('--\n')41    # query42    output_file.write('-- Loads data from: data3_batch.csv\n')43    output_file.write('--\n')44    output_file.write('-- Create Table\n')45    output_file.write('create(tbl,"tbl3_batch",db1,4)\n')46    output_file.write('create(col,"col1",db1.tbl3_batch)\n')47    output_file.write('create(col,"col2",db1.tbl3_batch)\n')48    output_file.write('create(col,"col3",db1.tbl3_batch)\n')49    output_file.write('create(col,"col4",db1.tbl3_batch)\n')50    output_file.write('--\n')51    output_file.write('-- Load data immediately\n')52    output_file.write('load(\"'+DOCKER_TEST_BASE_DIR+'/data3_batch.csv\")\n')53    output_file.write('--\n')54    output_file.write('-- Testing that the data is durable on disk.\n')55    output_file.write('shutdown\n')56    # no expected results57    data_gen_utils.closeFileHandles(output_file, exp_output_file)58def createTestEleven(dataTable):59    # prelude and query60    output_file, exp_output_file = data_gen_utils.openFileHandles(11, TEST_DIR=TEST_BASE_DIR)61    output_file.write('--\n')62    output_file.write('-- Testing for batching queries\n')63    output_file.write('-- 2 queries with NO overlap\n')64    output_file.write('--\n')65    output_file.write('-- Query in SQL:\n')66    output_file.write('-- SELECT col4 FROM tbl3_batch WHERE col1 >= 10 AND col1 < 20;\n')67    output_file.write('-- SELECT col4 FROM tbl3_batch WHERE col1 >= 800 AND col1 < 830;\n')68    output_file.write('--\n')69    output_file.write('--\n')70    output_file.write('batch_queries()\n')71    output_file.write('s1=select(db1.tbl3_batch.col1,10,20)\n')72    output_file.write('s2=select(db1.tbl3_batch.col1,800,830)\n')73    output_file.write('batch_execute()\n')74    output_file.write('f1=fetch(db1.tbl3_batch.col4,s1)\n')75    output_file.write('f2=fetch(db1.tbl3_batch.col4,s2)\n')76    output_file.write('print(f1)\n')77    output_file.write('print(f2)\n')78    # generate expected restuls. 79    dfSelectMask1 = (dataTable['col1'] >= 10) & (dataTable['col1'] < 20)80    dfSelectMask2 = (dataTable['col1'] >= 800) & (dataTable['col1'] < 830)81    output1 = dataTable[dfSelectMask1]['col4']82    output2 = dataTable[dfSelectMask2]['col4']83    exp_output_file.write(data_gen_utils.outputPrint(output1))84    exp_output_file.write('\n\n')85    exp_output_file.write(data_gen_utils.outputPrint(output2))86    exp_output_file.write('\n')87    data_gen_utils.closeFileHandles(output_file, exp_output_file)88def createTestTwelve(dataTable):89    # prelude and query90    output_file, exp_output_file = data_gen_utils.openFileHandles(12, TEST_DIR=TEST_BASE_DIR)91    output_file.write('--\n')92    output_file.write('-- Testing for batching queries\n')93    output_file.write('-- 2 queries with partial overlap\n')94    output_file.write('--\n')95    output_file.write('-- Query in SQL:\n')96    output_file.write('-- SELECT col4 FROM tbl3_batch WHERE col1 >= 600 AND col1 < 820;\n')97    output_file.write('-- SELECT col4 FROM tbl3_batch WHERE col1 >= 800 AND col1 < 830;\n')98    output_file.write('--\n')99    output_file.write('--\n')100    output_file.write('batch_queries()\n')101    output_file.write('s1=select(db1.tbl3_batch.col1,600,820)\n')102    output_file.write('s2=select(db1.tbl3_batch.col1,800,830)\n')103    output_file.write('batch_execute()\n')104    output_file.write('f1=fetch(db1.tbl3_batch.col4,s1)\n')105    output_file.write('f2=fetch(db1.tbl3_batch.col4,s2)\n')106    output_file.write('print(f1)\n')107    output_file.write('print(f2)\n')108    # generate expected restuls. 109    dfSelectMask1 = (dataTable['col1'] >= 600) & (dataTable['col1'] < 820)110    dfSelectMask2 = (dataTable['col1'] >= 800) & (dataTable['col1'] < 830)111    output1 = dataTable[dfSelectMask1]['col4']112    output2 = dataTable[dfSelectMask2]['col4']113    exp_output_file.write(data_gen_utils.outputPrint(output1))114    exp_output_file.write('\n\n')115    exp_output_file.write(data_gen_utils.outputPrint(output2))116    exp_output_file.write('\n')117    data_gen_utils.closeFileHandles(output_file, exp_output_file)118def createTestThirteen(dataTable):119    # prelude and query120    output_file, exp_output_file = data_gen_utils.openFileHandles(13, TEST_DIR=TEST_BASE_DIR)121    output_file.write('--\n')122    output_file.write('-- Testing for batching queries\n')123    output_file.write('-- 2 queries with full overlap (subsumption)\n')124    output_file.write('--\n')125    output_file.write('-- Query in SQL:\n')126    output_file.write('-- SELECT col4 FROM tbl3_batch WHERE col1 >= 810 AND col1 < 820;\n')127    output_file.write('-- SELECT col4 FROM tbl3_batch WHERE col1 >= 800 AND col1 < 830;\n')128    output_file.write('--\n')129    output_file.write('--\n')130    output_file.write('batch_queries()\n')131    output_file.write('s1=select(db1.tbl3_batch.col1,810,820)\n')132    output_file.write('s2=select(db1.tbl3_batch.col1,800,830)\n')133    output_file.write('batch_execute()\n')134    output_file.write('f1=fetch(db1.tbl3_batch.col4,s1)\n')135    output_file.write('f2=fetch(db1.tbl3_batch.col4,s2)\n')136    output_file.write('print(f1)\n')137    output_file.write('print(f2)\n')138     # generate expected restuls. 139    dfSelectMask1 = (dataTable['col1'] >= 810) & (dataTable['col1'] < 820)140    dfSelectMask2 = (dataTable['col1'] >= 800) & (dataTable['col1'] < 830)141    output1 = dataTable[dfSelectMask1]['col4']142    output2 = dataTable[dfSelectMask2]['col4']143    exp_output_file.write(data_gen_utils.outputPrint(output1))144    exp_output_file.write('\n\n')145    exp_output_file.write(data_gen_utils.outputPrint(output2))146    exp_output_file.write('\n')147    data_gen_utils.closeFileHandles(output_file, exp_output_file)148def createTestFourteen(dataTable):149    # prelude and query150    output_file, exp_output_file = data_gen_utils.openFileHandles(14, TEST_DIR=TEST_BASE_DIR)151    output_file.write('--\n')152    output_file.write('-- Testing for batching queries\n')153    output_file.write('-- Queries with no overlap\n')154    output_file.write('--\n')155    output_file.write('-- Query in SQL:\n')156    output_file.write('-- 10 Queries of the type:\n')157    output_file.write('-- SELECT col1 FROM tbl3_batch WHERE col4 >= _ AND col4 < _;\n')158    output_file.write('--\n')159    output_file.write('--\n')160    output_file.write('batch_queries()\n')161    for i in range(10):162        output_file.write('s{}=select(db1.tbl3_batch.col4,{},{})\n'.format(i, (1000 * i), (1000 * i) + 30))163    output_file.write('batch_execute()\n')164    for i in range(10):165        output_file.write('f{}=fetch(db1.tbl3_batch.col1,s{})\n'.format(i,i))166    for i in range(10):167        output_file.write('print(f{})\n'.format(i))168    #generate expected results169    for i in range(10):170        dfSelectMask = (dataTable['col4'] >= (1000 * i)) & (dataTable['col4'] < ((1000 * i) + 30))171        output = dataTable[dfSelectMask]['col1']172        exp_output_file.write(data_gen_utils.outputPrint(output))173        exp_output_file.write('\n\n')174    data_gen_utils.closeFileHandles(output_file, exp_output_file)175def createTestFifteen(dataTable):176    # prelude and queryDOCKER_TEST_BASE_DIR177    output_file, exp_output_file = data_gen_utils.openFileHandles(15, TEST_DIR=TEST_BASE_DIR)178    output_file.write('--\n')179    output_file.write('-- Testing for batching queries\n')180    output_file.write('-- Queries with full overlap (subsumption)\n')181    output_file.write('--\n')182    randomVal = np.random.randint(1000,9900)183    output_file.write('-- Query in SQL:\n')184    output_file.write('-- 10 Queries of the type:\n')185    output_file.write('-- SELECT col1 FROM tbl3_batch WHERE col4 >= _ AND col4 < _;\n')186    output_file.write('--\n')187    output_file.write('--\n')188    output_file.write('batch_queries()\n')189    for i in range(10):190        output_file.write('s{}=select(db1.tbl3_batch.col4,{},{})\n'.format(i, randomVal + (2 * i), randomVal + 60 - (2 * i)))191    output_file.write('batch_execute()\n')192    for i in range(10):193        output_file.write('f{}=fetch(db1.tbl3_batch.col1,s{})\n'.format(i,i))194    for i in range(10):195        output_file.write('print(f{})\n'.format(i))196    #generate expected results197    for i in range(10):198        dfSelectMask = (dataTable['col4'] >= (randomVal + (2 * i))) & (dataTable['col4'] < (randomVal + 60 - (2 * i)))199        output = dataTable[dfSelectMask]['col1']200        exp_output_file.write(data_gen_utils.outputPrint(output))201        exp_output_file.write('\n\n')202    data_gen_utils.closeFileHandles(output_file, exp_output_file)203def createTests16And17(dataTable, dataSize):204    # 1 / 1000 tuples should qualify on average. This is so that most time is spent on scans & not fetches or prints205    offset = np.max([1, int(dataSize/5000)])206    query_starts = np.random.randint(0,(dataSize/8), size = (100))207    output_file16, exp_output_file16 = data_gen_utils.openFileHandles(16, TEST_DIR=TEST_BASE_DIR)208    output_file17, exp_output_file17 = data_gen_utils.openFileHandles(17, TEST_DIR=TEST_BASE_DIR)209    output_file16.write('--\n')210    output_file16.write('-- Control timing for without batching\n')211    output_file16.write('-- Queries for 16 and 17 are identical.\n')212    output_file16.write('-- Query in SQL:\n')213    output_file16.write('-- 100 Queries of the type:\n')214    output_file16.write('-- SELECT col3 FROM tbl3_batch WHERE col2 >= _ AND col2 < _;\n')215    output_file16.write('--\n')216    output_file17.write('--\n')217    output_file17.write('-- Same queries with batching\n')218    output_file17.write('-- Queries for 16 and 17 are identical.\n')219    output_file17.write('--\n')220    output_file17.write('batch_queries()\n')221    for i in range(100):222        output_file16.write('s{}=select(db1.tbl3_batch.col2,{},{})\n'.format(i, query_starts[i], query_starts[i] + offset))223        output_file17.write('s{}=select(db1.tbl3_batch.col2,{},{})\n'.format(i, query_starts[i], query_starts[i] + offset))224    output_file17.write('batch_execute()\n')225    for i in range(100):226        output_file16.write('f{}=fetch(db1.tbl3_batch.col3,s{})\n'.format(i,i))227        output_file17.write('f{}=fetch(db1.tbl3_batch.col3,s{})\n'.format(i,i))228    for i in range(100):229        output_file16.write('print(f{})\n'.format(i))230        output_file17.write('print(f{})\n'.format(i))231    # generate expected results232    for i in range(100):233        dfSelectMask = (dataTable['col2'] >= query_starts[i]) & ((dataTable['col2'] < (query_starts[i] + offset)))234        output = dataTable[dfSelectMask]['col3']235        exp_output_file16.write(data_gen_utils.outputPrint(output))236        exp_output_file16.write('\n\n')237        exp_output_file17.write(data_gen_utils.outputPrint(output))238        exp_output_file17.write('\n\n')239    data_gen_utils.closeFileHandles(output_file16, exp_output_file16)240    data_gen_utils.closeFileHandles(output_file17, exp_output_file17)241def generateMilestoneTwoFiles(dataSize, randomSeed):242    np.random.seed(randomSeed)243    dataTable = generateDataMilestone2(dataSize)   244    createTestTen()245    createTestEleven(dataTable)246    createTestTwelve(dataTable)247    createTestThirteen(dataTable)248    createTestFourteen(dataTable)249    createTestFifteen(dataTable)250    createTests16And17(dataTable, dataSize)251def main(argv):252    global TEST_BASE_DIR253    global DOCKER_TEST_BASE_DIR254    dataSize = int(argv[0])255    if len(argv) > 1:256        randomSeed = int(argv[1])257    else:258        randomSeed = 47259    # override the base directory for where to output test related files260    if len(argv) > 2:261        TEST_BASE_DIR = argv[2]262        if len(argv) > 3:263            DOCKER_TEST_BASE_DIR = argv[3]264    generateMilestoneTwoFiles(dataSize, randomSeed)265if __name__ == "__main__":...h52vtp.py
Source:h52vtp.py  
1"""2    Convert h5 files to vtp files in VTK XML format that can be opened by ParaView.3    The data type of the vtp file is "vtkPolyData", each PolyData piece specifies a set4    of points and cells independently from the other pieces. The points are described5    explicitly by the Points element. The cells are described explicitly by the Verts,6    Lines, Strips, and Polys elements.7    <VTKFile type="PolyData" ...>8          <PolyData>9              <Piece NumberOfPoints="#" NumberOfVerts="#" NumberOfLines="#"10              NumberOfStrips="#" NumberOfPolys="#">11                  <PointData>...</PointData>12                  <CellData>...</CellData>13                  <Points>...</Points>14                  <Verts>...</Verts>15                  <Lines>...</Lines>16                  <Strips>...</Strips>17                  <Polys>...</Polys>18              </Piece>19          </PolyData>20    </VTKFile>21"""22import math23import argparse24import h5py25import numpy as np26from scipy import interpolate27def h5_to_vtp(surf_file, surf_name='train_loss', log=False, zmax=-1, interp=-1):28    # set this to True to generate points29    show_points = False30    # set this to True to generate polygons31    show_polys = True32    f = h5py.File(surf_file, 'r')33    [xcoordinates, ycoordinates] = np.meshgrid(f['xcoordinates'][:], f['ycoordinates'][:][:])34    vals = f[surf_name]35    x_array = xcoordinates[:].ravel()36    y_array = ycoordinates[:].ravel()37    z_array = vals[:].ravel()38    # Interpolate the resolution up to the desired amount39    if interp > 0:40        m = interpolate.interp2d(xcoordinates[0, :], ycoordinates[:, 0], vals, kind='cubic')41        x_array = np.linspace(min(x_array), max(x_array), interp)42        y_array = np.linspace(min(y_array), max(y_array), interp)43        z_array = m(x_array, y_array).ravel()44        x_array, y_array = np.meshgrid(x_array, y_array)45        x_array = x_array.ravel()46        y_array = y_array.ravel()47    vtp_file = surf_file + "_" + surf_name48    if zmax > 0:49        z_array[z_array > zmax] = zmax50        vtp_file += "_zmax=" + str(zmax)51    if log:52        z_array = np.log(z_array + 0.1)53        vtp_file += "_log"54    vtp_file += ".vtp"55    print("Here's your output file:{}".format(vtp_file))56    number_points = len(z_array)57    print("number_points = {} points".format(number_points))58    matrix_size = int(math.sqrt(number_points))59    print("matrix_size = {} x {}".format(matrix_size, matrix_size))60    poly_size = matrix_size - 161    print("poly_size = {} x {}".format(poly_size, poly_size))62    number_polys = poly_size * poly_size63    print("number_polys = {}".format(number_polys))64    min_value_array = [min(x_array), min(y_array), min(z_array)]65    max_value_array = [max(x_array), max(y_array), max(z_array)]66    min_value = min(min_value_array)67    max_value = max(max_value_array)68    averaged_z_value_array = []69    poly_count = 070    for column_count in range(poly_size):71        stride_value = column_count * matrix_size72        for row_count in range(poly_size):73            temp_index = stride_value + row_count74            averaged_z_value = (z_array[temp_index] + z_array[temp_index + 1] +75                                z_array[temp_index + matrix_size] +76                                z_array[temp_index + matrix_size + 1]) / 4.077            averaged_z_value_array.append(averaged_z_value)78            poly_count += 179    avg_min_value = min(averaged_z_value_array)80    avg_max_value = max(averaged_z_value_array)81    output_file = open(vtp_file, 'w')82    output_file.write('<VTKFile type="PolyData" version="1.0" byte_order="LittleEndian" header_type="UInt64">\n')83    output_file.write('  <PolyData>\n')84    if (show_points and show_polys):85        output_file.write(86            '    <Piece NumberOfPoints="{}" NumberOfVerts="{}" NumberOfLines="0" NumberOfStrips="0" NumberOfPolys="{}">\n'.format(87                number_points, number_points, number_polys))88    elif (show_polys):89        output_file.write(90            '    <Piece NumberOfPoints="{}" NumberOfVerts="0" NumberOfLines="0" NumberOfStrips="0" NumberOfPolys="{}">\n'.format(91                number_points, number_polys))92    else:93        output_file.write(94            '    <Piece NumberOfPoints="{}" NumberOfVerts="{}" NumberOfLines="0" NumberOfStrips="0" NumberOfPolys="">\n'.format(95                number_points, number_points))96    # <PointData>97    output_file.write('      <PointData>\n')98    output_file.write(99        '        <DataArray type="Float32" Name="zvalue" NumberOfComponents="1" format="ascii" RangeMin="{}" RangeMax="{}">\n'.format(100            min_value_array[2], max_value_array[2]))101    for vertexcount in range(number_points):102        if (vertexcount % 6) is 0:103            output_file.write('          ')104        output_file.write('{}'.format(z_array[vertexcount]))105        if (vertexcount % 6) is 5:106            output_file.write('\n')107        else:108            output_file.write(' ')109    if (vertexcount % 6) is not 5:110        output_file.write('\n')111    output_file.write('        </DataArray>\n')112    output_file.write('      </PointData>\n')113    # <CellData>114    output_file.write('      <CellData>\n')115    if (show_polys and not show_points):116        output_file.write(117            '        <DataArray type="Float32" Name="averaged zvalue" NumberOfComponents="1" format="ascii" RangeMin="{}" RangeMax="{}">\n'.format(118                avg_min_value, avg_max_value))119        for vertexcount in range(number_polys):120            if (vertexcount % 6) is 0:121                output_file.write('          ')122            output_file.write('{}'.format(averaged_z_value_array[vertexcount]))123            if (vertexcount % 6) is 5:124                output_file.write('\n')125            else:126                output_file.write(' ')127        if (vertexcount % 6) is not 5:128            output_file.write('\n')129        output_file.write('        </DataArray>\n')130    output_file.write('      </CellData>\n')131    # <Points>132    output_file.write('      <Points>\n')133    output_file.write(134        '        <DataArray type="Float32" Name="Points" NumberOfComponents="3" format="ascii" RangeMin="{}" RangeMax="{}">\n'.format(135            min_value, max_value))136    for vertexcount in range(number_points):137        if (vertexcount % 2) is 0:138            output_file.write('          ')139        output_file.write('{} {} {}'.format(x_array[vertexcount], y_array[vertexcount], z_array[vertexcount]))140        if (vertexcount % 2) is 1:141            output_file.write('\n')142        else:143            output_file.write(' ')144    if (vertexcount % 2) is not 1:145        output_file.write('\n')146    output_file.write('        </DataArray>\n')147    output_file.write('      </Points>\n')148    # <Verts>149    output_file.write('      <Verts>\n')150    output_file.write(151        '        <DataArray type="Int64" Name="connectivity" format="ascii" RangeMin="0" RangeMax="{}">\n'.format(152            number_points - 1))153    if (show_points):154        for vertexcount in range(number_points):155            if (vertexcount % 6) is 0:156                output_file.write('          ')157            output_file.write('{}'.format(vertexcount))158            if (vertexcount % 6) is 5:159                output_file.write('\n')160            else:161                output_file.write(' ')162        if (vertexcount % 6) is not 5:163            output_file.write('\n')164    output_file.write('        </DataArray>\n')165    output_file.write(166        '        <DataArray type="Int64" Name="offsets" format="ascii" RangeMin="1" RangeMax="{}">\n'.format(167            number_points))168    if (show_points):169        for vertexcount in range(number_points):170            if (vertexcount % 6) is 0:171                output_file.write('          ')172            output_file.write('{}'.format(vertexcount + 1))173            if (vertexcount % 6) is 5:174                output_file.write('\n')175            else:176                output_file.write(' ')177        if (vertexcount % 6) is not 5:178            output_file.write('\n')179    output_file.write('        </DataArray>\n')180    output_file.write('      </Verts>\n')181    # <Lines>182    output_file.write('      <Lines>\n')183    output_file.write(184        '        <DataArray type="Int64" Name="connectivity" format="ascii" RangeMin="0" RangeMax="{}">\n'.format(185            number_polys - 1))186    output_file.write('        </DataArray>\n')187    output_file.write(188        '        <DataArray type="Int64" Name="offsets" format="ascii" RangeMin="1" RangeMax="{}">\n'.format(189            number_polys))190    output_file.write('        </DataArray>\n')191    output_file.write('      </Lines>\n')192    # <Strips>193    output_file.write('      <Strips>\n')194    output_file.write(195        '        <DataArray type="Int64" Name="connectivity" format="ascii" RangeMin="0" RangeMax="{}">\n'.format(196            number_polys - 1))197    output_file.write('        </DataArray>\n')198    output_file.write(199        '        <DataArray type="Int64" Name="offsets" format="ascii" RangeMin="1" RangeMax="{}">\n'.format(200            number_polys))201    output_file.write('        </DataArray>\n')202    output_file.write('      </Strips>\n')203    # <Polys>204    output_file.write('      <Polys>\n')205    output_file.write(206        '        <DataArray type="Int64" Name="connectivity" format="ascii" RangeMin="0" RangeMax="{}">\n'.format(207            number_polys - 1))208    if (show_polys):209        polycount = 0210        for column_count in range(poly_size):211            stride_value = column_count * matrix_size212            for row_count in range(poly_size):213                temp_index = stride_value + row_count214                if (polycount % 2) is 0:215                    output_file.write('          ')216                output_file.write('{} {} {} {}'.format(temp_index, (temp_index + 1), (temp_index + matrix_size + 1),217                                                       (temp_index + matrix_size)))218                if (polycount % 2) is 1:219                    output_file.write('\n')220                else:221                    output_file.write(' ')222                polycount += 1223        if (polycount % 2) is 1:224            output_file.write('\n')225    output_file.write('        </DataArray>\n')226    output_file.write(227        '        <DataArray type="Int64" Name="offsets" format="ascii" RangeMin="1" RangeMax="{}">\n'.format(228            number_polys))229    if (show_polys):230        for polycount in range(number_polys):231            if (polycount % 6) is 0:232                output_file.write('          ')233            output_file.write('{}'.format((polycount + 1) * 4))234            if (polycount % 6) is 5:235                output_file.write('\n')236            else:237                output_file.write(' ')238        if (polycount % 6) is not 5:239            output_file.write('\n')240    output_file.write('        </DataArray>\n')241    output_file.write('      </Polys>\n')242    output_file.write('    </Piece>\n')243    output_file.write('  </PolyData>\n')244    output_file.write('</VTKFile>\n')245    output_file.write('')246    output_file.close()247    print("Done with file:{}".format(vtp_file))248if __name__ == '__main__':249    parser = argparse.ArgumentParser(250        description='Convert h5 file to XML-based VTK file that can be opened with ParaView')251    parser.add_argument('--surf_file', '-f', default='', help='The h5 file that contains surface values')252    parser.add_argument('--surf_name', default='train_loss',253                        help='The type of surface to plot: train_loss | test_loss | train_acc | test_acc ')254    parser.add_argument('--zmax', default=-1, type=float, help='Maximum z value to map')255    parser.add_argument('--interp', default=-1, type=int,256                        help='Interpolate the surface to this resolution (1000 recommended)')257    parser.add_argument('--log', action='store_true', default=False, help='log scale')258    args = parser.parse_args()...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
