Unlock 30% off on Manual Testing Annual Plans this Holiday Season.

Copied to Clipboard!

How to use coerce_dtype method in pandera

Best Python code snippet using pandera_python

SetOps.py

Source:SetOps.py

...18uncoerce_dtype = unflatten_dtype19def argsort(ar):20    ar = np.asanyarray(ar)21    if ar.ndim > 1:22        ar, _, _, _ = coerce_dtype(ar)23    return recast_permutation(np.argsort(ar, kind='mergesort'))24def unique(ar, return_index=False, return_inverse=False,25           return_counts=False, axis=0, sorting=None, minimal_dtype=False):26    """27    A variant on np.unique with default support for `axis=0` and sorting28    """29    ar = np.asanyarray(ar)30    if ar.ndim == 1:31        ret = unique1d(ar, return_index=return_index, return_inverse=return_inverse,32                       return_counts=return_counts, sorting=sorting, minimal_dtype=minimal_dtype)33        return ret34    # axis was specified and not None35    try:36        ar = np.moveaxis(ar, axis, 0)37    except np.AxisError:38        # this removes the "axis1" or "axis2" prefix from the error message39        raise np.AxisError(axis, ar.ndim)40    # coerce the data into the approrpriate shape41    consolidated, dtype, orig_shape, orig_dtype = coerce_dtype(ar)42    output = unique1d(consolidated,43                                return_index=return_index, return_inverse=return_inverse,44                                return_counts=return_counts, sorting=sorting, minimal_dtype=minimal_dtype)45    output = (uncoerce_dtype(output[0], orig_shape, orig_dtype, axis),) + output[1:]46    return output47def unique1d(ar, return_index=False, return_inverse=False,48              return_counts=False, sorting=None, minimal_dtype=False):49    """50    Find the unique elements of an array, ignoring shape.51    """52    ar = np.asanyarray(ar)53    if sorting is None:54        if minimal_dtype:55            sorting = recast_permutation(ar.argsort(kind='mergesort')) # we want to have stable sorts throughout56        else:57            sorting = ar.argsort(kind='mergesort') # we want to have stable sorts throughout58    ar = ar[sorting]59    mask = np.empty(ar.shape, dtype=np.bool_)60    mask[:1] = True61    mask[1:] = ar[1:] != ar[:-1]62    ret = (ar[mask], sorting)63    if return_index:64        ret += (sorting[mask],)65    if return_inverse:66        imask = np.cumsum(mask) - 167        inv_idx = np.empty(mask.shape, dtype=np.intp)68        inv_idx[sorting] = imask69        ret += (inv_idx,)70    if return_counts:71        idx = np.concatenate(np.nonzero(mask) + ([mask.size],))72        ret += (np.diff(idx),)73    return ret74def intersection(ar1, ar2,75                assume_unique=False, return_indices=False,76                sortings=None, union_sorting=None, minimal_dtype=False77                ):78    ar1 = np.asanyarray(ar1)79    ar2 = np.asanyarray(ar2)80    if ar1.dtype < ar2.dtype:81        ar1 = ar1.astype(ar2.dtype)82    elif ar1.dtype < ar2.dtype:83        ar2 = ar2.astype(ar1.dtype)84    if ar1.ndim == 1:85        ret = intersect1d(ar1, ar2, assume_unique=assume_unique, return_indices=return_indices,86                          sortings=sortings, union_sorting=union_sorting, minimal_dtype=minimal_dtype)87        return ret88    ar1, dtype, orig_shape1, orig_dtype1 = coerce_dtype(ar1)89    ar2, dtype, orig_shape2, orig_dtype2 = coerce_dtype(ar2, dtype=dtype)90    output = intersect1d(ar1, ar2, assume_unique=assume_unique, return_indices=return_indices,91                          sortings=sortings, union_sorting=union_sorting)92    output = (uncoerce_dtype(output[0], orig_shape1, orig_dtype1, None),) + output[1:]93    return output94def intersect1d(ar1, ar2,95                assume_unique=False, return_indices=False,96                sortings=None, union_sorting=None, minimal_dtype=False97                ):98    """99    Find the intersection of two arrays.100    """101    ar1 = np.asanyarray(ar1)102    ar2 = np.asanyarray(ar2)103    if not assume_unique:104        if return_indices:105            if sortings is not None:106                ar1, sorting1, ind1 = unique1d(ar1, return_index=True, sorting=sortings[0], minimal_dtype=minimal_dtype)107                ar2, sorting2, ind2 = unique1d(ar2, return_index=True, sorting=sortings[1], minimal_dtype=minimal_dtype)108            else:109                ar1, sorting1, ind1 = unique1d(ar1, return_index=True, minimal_dtype=minimal_dtype)110                ar2, sorting2, ind2 = unique1d(ar2, return_index=True, minimal_dtype=minimal_dtype)111        else:112            if sortings is not None:113                ar1, sorting1 = unique1d(ar1, sorting=sortings[0], minimal_dtype=minimal_dtype)114                ar2, sorting2 = unique1d(ar2, sorting=sortings[1], minimal_dtype=minimal_dtype)115            else:116                ar1, sorting1 = unique1d(ar1)117                ar2, sorting2 = unique1d(ar2)118        sortings = (sorting1, sorting2)119    aux = np.concatenate((ar1, ar2))120    if union_sorting is None:121        aux_sort_indices = np.argsort(aux, kind='mergesort')122        if minimal_dtype:123            aux_sort_indices = recast_permutation(aux_sort_indices)124        aux = aux[aux_sort_indices]125    else:126        aux_sort_indices = union_sorting127        aux = aux[aux_sort_indices]128    mask = aux[1:] == aux[:-1]129    int1d = aux[:-1][mask]130    if return_indices:131        ar1_indices = aux_sort_indices[:-1][mask]132        ar2_indices = aux_sort_indices[1:][mask] - ar1.size133        if not assume_unique:134            ar1_indices = ind1[ar1_indices]135            ar2_indices = ind2[ar2_indices]136        return int1d, sortings, union_sorting, ar1_indices, ar2_indices137    else:138        return int1d, sortings, union_sorting139def contained(ar1, ar2, assume_unique=False, invert=False,140                sortings=None, union_sorting=None, method=None):141    """142    Test whether each element of `ar1` is also present in `ar2`.143    """144    # Ravel both arrays, behavior for the first array could be different145    ar1 = np.asanyarray(ar1)146    ar2 = np.asanyarray(ar2)147    if ar1.dtype < ar2.dtype:148        ar1 = ar1.astype(ar2.dtype)149    elif ar2.dtype < ar1.dtype:150        ar2 = ar2.astype(ar1.dtype)151    if ar1.ndim > 1:152        ar1, dtype, orig_shape1, orig_dtype1 = coerce_dtype(ar1)153        ar2, dtype, orig_shape2, orig_dtype2 = coerce_dtype(ar2, dtype=dtype)154    # Check if one of the arrays may contain arbitrary objects155    contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject156    # This code is run when157    # a) the first condition is true, making the code significantly faster158    # b) the second condition is true (i.e. `ar1` or `ar2` may contain159    #    arbitrary objects), since then sorting is not guaranteed to work160    if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object:161        if invert:162            mask = np.ones(len(ar1), dtype=bool)163            for a in ar2:164                mask &= (ar1 != a)165        else:166            mask = np.zeros(len(ar1), dtype=bool)167            for a in ar2:168                mask |= (ar1 == a)169        return mask, sortings, union_sorting170    # Otherwise use sorting171    if assume_unique is not True and assume_unique is not False: # i.e. it's a bool172        assume_unique_1, assume_unique_2 = assume_unique173    else:174        assume_unique_1 = assume_unique_2 = assume_unique175    if not assume_unique_1:176        if sortings is None:177            ar1, sorting1, rev_idx = unique1d(ar1, return_inverse=True)178        else:179            ar1, sorting1, rev_idx = unique1d(ar1, sorting=sortings[0], return_inverse=True)180    else:181        if sortings is not None:182            sorting1 = sortings[0]183        else:184            sorting1 = None185    if not assume_unique_2:186        if sortings is None:187            ar2, sorting2 = unique1d(ar2)188        else:189            ar2, sorting2 = unique1d(ar2, sorting=sortings[1])190    else:191        if sortings is not None:192            sorting2 = sortings[1]193        else:194            sorting2 = None195    sortings = (sorting1, sorting2)196    if method is not None and method == 'find':197        find_pos, _ = find(ar2, ar1, sorting='sorted', check=False) #binary search is fast198        if invert:199            ret = ar2[find_pos] != ar1200        else:201            ret = ar2[find_pos] == ar1202        order = None203    else:204        ar = np.concatenate((ar1, ar2))205        # We need this to be a stable sort, so always use 'mergesort'206        # here. The values from the first array should always come before207        # the values from the second array.208        if union_sorting is None:209            order = ar.argsort(kind='mergesort')210        else:211            order = union_sorting212        sar = ar[order]213        if invert:214            bool_ar = (sar[1:] != sar[:-1])215        else:216            bool_ar = (sar[1:] == sar[:-1])217        flag = np.concatenate((bool_ar, [invert]))218        ret = np.empty(ar.shape, dtype=bool)219        ret[order] = flag220    if assume_unique_1:221        return ret[:len(ar1)], sortings, order222    else:223        return ret[rev_idx], sortings, order224def difference(ar1, ar2, assume_unique=False, sortings=None, method=None, union_sorting=None):225    """226    Calculates set differences over any shape of array227    """228    ar1 = np.asanyarray(ar1)229    ar2 = np.asanyarray(ar2)230    if ar1.dtype < ar2.dtype:231        ar1 = ar1.astype(ar2.dtype)232    elif ar2.dtype < ar1.dtype:233        ar2 = ar2.astype(ar1.dtype)234    if ar1.ndim == 1:235        ret = difference1d(ar1, ar2, assume_unique=assume_unique, method=method,236                          sortings=sortings, union_sorting=union_sorting)237        return ret238    ar1, dtype, orig_shape1, orig_dtype1 = coerce_dtype(ar1)239    ar2, dtype, orig_shape2, orig_dtype2 = coerce_dtype(ar2, dtype=dtype)240    output = difference1d(ar1, ar2, assume_unique=assume_unique, method=method,241                          sortings=sortings, union_sorting=union_sorting)242    output = (uncoerce_dtype(output[0], orig_shape1, orig_dtype1, None),) + output[1:]243    return output244def difference1d(ar1, ar2, assume_unique=False, sortings=None, method=None, union_sorting=None):245    """246    Calculates set differences in 1D247    """248    if not assume_unique:249        if sortings is not None:250            ar1, sorting1 = unique(ar1, sorting=sortings[0])251            ar2, sorting2 = unique(ar2, sorting=sortings[0])252        else:253            ar1, sorting1 = unique(ar1)254            ar2, sorting2 = unique(ar2)255        sortings = (sorting1, sorting2)256    in_spec = contained(ar1, ar2, sortings=sortings, union_sorting=union_sorting, assume_unique=True, method=method, invert=True)257    return (ar1[in_spec[0]],) + in_spec[1:]258def find1d(ar, to_find, sorting=None,259           search_space_sorting=None, return_search_space_sorting=False,260           check=True, minimal_dtype=False, missing_val='raise'261           ):262    """263    Finds elements in an array and returns sorting264    """265    presorted = isinstance(sorting, str) and sorting == 'sorted'266    if sorting is None:267        sorting = np.argsort(ar, kind='mergesort')268    if search_space_sorting is None and return_search_space_sorting:269        search_space_sorting = np.argsort(to_find, kind='mergesort')270    if search_space_sorting is not None:271        if isinstance(search_space_sorting, np.ndarray):272            search_space_inverse_sorting = np.argsort(search_space_sorting)273        else:274            search_space_sorting, search_space_inverse_sorting = search_space_sorting275        to_find = to_find[search_space_sorting]276    if presorted:277        vals = np.searchsorted(ar, to_find)278    else:279        vals = np.searchsorted(ar, to_find, sorter=sorting)280    if isinstance(vals, (np.integer, int)):281        vals = np.array([vals])282    # we have the ordering according to the _sorted_ version of `ar`283    # so now we need to invert that back to the unsorted version284    if len(sorting) > 0:285        big_vals = vals == len(ar)286        vals[big_vals] = -1287        if not presorted:288            vals = sorting[vals]289        if check:290            # now because of how searchsorted works, we need to check if the found values291            # truly agree with what we asked for292            bad_vals = ar[vals] != to_find293            if vals.shape == ():294                if bad_vals:295                    vals = -1296            else:297                # print(vals, bad_vals)298                vals[bad_vals] = -1299        else:300            bad_vals = big_vals301    else:302        bad_vals = np.full_like(to_find, True)303        vals = np.full_like(vals, -1)304    if check and bad_vals.any():305        if isinstance(missing_val, str) and missing_val == 'raise':306            raise IndexError("{} not in array".format(to_find[bad_vals]))307        else:308            vals[bad_vals] = missing_val309    if minimal_dtype and not bad_vals.any(): # protecting the missings310        vals = downcast_index_array(vals, ar.shape[-1])311    if search_space_sorting is not None:312        vals = vals[search_space_inverse_sorting]313    ret = (vals, sorting,)314    if return_search_space_sorting:315        ret += ((search_space_sorting, search_space_inverse_sorting),)316    return ret317def find(ar, to_find, sorting=None,318         search_space_sorting=None,319         return_search_space_sorting=False,320         check=True, minimal_dtype=False, missing_val='raise'):321    """322    Finds elements in an array and returns sorting323    """324    ar = np.asanyarray(ar)325    to_find = np.asanyarray(to_find)326    if ar.dtype < to_find.dtype:327        ar = ar.astype(to_find.dtype)328    elif to_find.dtype < ar.dtype:329        to_find = to_find.astype(ar.dtype)330    # print(ar.dtype, to_find.dtype )331    if ar.ndim == 1:332        ret = find1d(ar, to_find, sorting=sorting, check=check,333                     search_space_sorting=search_space_sorting,334                     return_search_space_sorting=return_search_space_sorting,335                     minimal_dtype=minimal_dtype, missing_val=missing_val336                     )337        return ret338    ar, dtype, orig_shape1, orig_dtype1 = coerce_dtype(ar)339    to_find, dtype, orig_shape2, orig_dtype2 = coerce_dtype(to_find, dtype=dtype)340    output = find1d(ar, to_find, sorting=sorting, check=check,341                     search_space_sorting=search_space_sorting,342                     return_search_space_sorting=return_search_space_sorting,343                     minimal_dtype=minimal_dtype, missing_val=missing_val344                    )345    return output346def group_by1d(ar, keys, sorting=None, return_indices=False):347    """348    Splits an array by a keys349    :param ar:350    :type ar:351    :param keys:352    :type keys:353    :param sorting:354    :type sorting:355    :return:356    :rtype:357    """358    uinds, sorting, mask = unique(keys, sorting=sorting, return_inverse=True)359    _, _, inds = unique(mask[sorting], sorting=np.arange(len(mask)), return_index=True)360    groups = np.split(ar[sorting,], inds)[1:]361    ret = ((uinds, groups), sorting)362    if return_indices:363        ret += (inds,)364    return ret365def group_by(ar, keys, sorting=None, return_indices=False):366    """367    Groups an array by keys368    :param ar:369    :type ar:370    :param keys:371    :type keys:372    :param sorting:373    :type sorting:374    :return: group pairs & sorting info375    :rtype:376    """377    ar = np.asanyarray(ar)378    keys = np.asanyarray(keys)379    if keys.ndim == 1:380        ret = group_by1d(ar, keys, sorting=sorting, return_indices=return_indices)381        return ret382    keys, dtype, orig_shape, orig_dtype = coerce_dtype(keys)383    output = group_by1d(ar, keys, sorting=sorting, return_indices=return_indices)384    ukeys, groups = output[0]385    ukeys = uncoerce_dtype(ukeys, orig_shape, orig_dtype, None)386    output = ((ukeys, groups),) + output[1:]387    return output388def split_by_regions1d(ar, regions, sortings=None, return_indices=False):389    """390    :param regions:391    :type regions:392    :param ar1:393    :type ar1:394    :return:395    :rtype:396    """397    if sortings is None:398        sortings = (None, None)399    ar_sorting, region_sorting = sortings400    if ar_sorting is None:401        ar_sorting = argsort(ar)402    ar = ar[ar_sorting]403    if region_sorting is None:404        region_sorting = argsort(regions)405    insertion_spots = np.searchsorted(regions, ar, sorter=region_sorting)406    uinds, _, inds = unique(insertion_spots, sorting=np.arange(len(insertion_spots)), return_index=True)407    groups = np.split(ar, inds)[1:]408    output = (uinds, groups)409    if return_indices:410        return output, inds, sortings411    else:412        return output, sortings413def split_by_regions(ar, regions, sortings=None, return_indices=False):414    """415    Splits an array up by edges defined by regions.416    Operates in 1D but can take compound dtypes using lexicographic417    ordering.418    In that case it is on the user to ensure that lex ordering is what is desired.419    :param ar:420    :type ar:421    :param regions:422    :type regions:423    :param sortings:424    :type sortings:425    :return:426    :rtype:427    """428    ar = np.asanyarray(ar)429    regions = np.asanyarray(regions)430    if ar.ndim == 1:431        ret = split_by_regions1d(regions, ar, sortings=sortings, return_indices=return_indices)432        return ret433    ar, dtype, orig_shape, orig_dtype = coerce_dtype(ar)434    regions, dtype, orig_shape1, orig_dtype1 = coerce_dtype(ar, dtype=dtype)435    output = split_by_regions1d(regions, ar, sortings=sortings, return_indices=return_indices)436    uinds, groups = output[0]437    groups = uncoerce_dtype(groups, orig_shape, orig_dtype, None)438    output = ((uinds, groups),) + output[1:]...

uber_model.py

Source:uber_model.py

1import importlib2import pandas as pd3from pandas import compat4from .parser import Parser5import logging6class UberModel(object):7    """8    Collection of static methods used across all the ubertool models.9    """10    def __init__(self):11        """Main utility class for building Ubertool model classes for model execution."""12        super(UberModel, self).__init__()13        self.name = self.__class__.__name__14        self.pd_obj = None15        self.pd_obj_exp = None16        self.pd_obj_out = None17    def validate_input_names(self, model_inputs, user_inputs):18        """19        Compare the user supplied inputs with the ModelInputs() class attributes, ensuring they match by name20        :param model_inputs: ModelInputs() class instance21        :return: Boolean22        """23        # Create temporary DataFrame where each column name is the same as ModelInputs attributes24        df = pd.DataFrame()25        for input_param in model_inputs.__dict__:26            df[input_param] = getattr(self, input_param)27        keys_a = set(df.keys())28        keys_b = set(self.pd_obj.keys())29        extras = keys_b - keys_a30        n_extras = len(extras)31        print(f'There are {n_extras} extra keys.')32        if(n_extras > 0): print(extras)33        missing = keys_a - keys_b34        n_missing = len(missing)35        print(f'There are {n_missing} missing keys.')36        if(n_missing > 0): print(missing)37        # Compare column names of temporary DataFrame (created above) to user-supply DataFrame from JSON38        #if df.columns.sort_values().equals(user_inputs.columns.sort_values()):39        if n_extras >= 0 and n_missing == 0:40            print('Input parameters match what is expected.')41            print(set(df.keys()))42            return True43        else:44            print('Inputs parameters do not have all required inputs.')45            msg_err1 = "Inputs parameters do not have all required inputs. Please see API documentation.\n"46            msg_err2 = "Expected: \n{}\n".format(df.columns.sort_values())47            msg_err3 = "Received: \n{}\n".format(self.pd_obj.columns.sort_values())48            missing = [item for item in keys_a if item not in keys_b]49            msg_missing = "missing the following field(s): \n{}\n".format(missing)50            extras = [item for item in keys_b if item not in keys_a]51            msg_extras = "the following extra field(s) were found: \n{}\n".format(extras)52            print(msg_err1 + msg_err2 + msg_err3 + msg_missing + msg_extras)53            raise ValueError(msg_err1 + msg_err2 + msg_err3 + msg_missing + msg_extras)54    def coerce_input_dtype(self, incoming_dtype, coerce_dtype, input_series):55        #logging.info(incoming_dtype)56        if coerce_dtype == 'object':57            return input_series.astype('object')58        elif coerce_dtype == 'float64':59            if incoming_dtype == 'object':60                #coerces strings to np.nans61                return pd.to_numeric(input_series, errors='coerce')62            elif incoming_dtype == 'float64':63                return input_series64            else:65                return input_series.astype('float64')66        elif coerce_dtype == 'int64' or 'int32':67            if incoming_dtype == 'object':68                #coerces strings to np.nans69                return pd.to_numeric(input_series, errors='coerce', downcast='int64')70            else:71                return input_series.astype('int64')72        else:73            print("dtype of {} is {}\n"74                  "This format is not handled by UberModel.coerce_input_dtype()".format(input_series.name, coerce_dtype))75            return input_series76    @staticmethod77    def convert_index(df_in):78        """ Attempt to covert indices of input DataFrame to duck typed dtype """79        parser = Parser(df_in)80        df = parser.convert_axes()81        return df82    def populate_inputs(self, df_in):83        """84        Validate and assign user-provided model inputs to their respective class attributes85        :param df_in: Pandas DataFrame object of model input parameters86        """87        df_user = self.convert_index(df_in)88        # mod_name = self.name.lower() + '.' + self.name.lower() + '_exe'89        mod_name = "pram_flask.ubertool.ubertool." + self.name.lower() + "." + self.name.lower() + '_exe'90        #print(mod_name)91        try:92            # Import the model's input class (e.g. TedInputs) to compare user supplied inputs to93            module = importlib.import_module(mod_name)94            model_inputs_class = getattr(module, self.name + "Inputs")95            model_inputs = model_inputs_class()96        except ValueError as err:97            logging.info(mod_name)98            logging.info(err.args)99        try:100            if self.validate_input_names(model_inputs, df_user):101                # If the user-supplied DataFrame has the same column names as required by ModelInputs...102                # set each Series in the DataFrame to the corresponding ModelInputs attribute (member variable)103                # user_inputs_df = self._sanitize(df)104                for column in df_user.columns:105                    coerce_dtype = str(getattr(model_inputs, column).dtype)106                    df_series = df_user[column]107                    initial_dtype = str(df_series.dtype)108                    #if initial_dtype != coerce_dtype:109                        #logging.info('var:' + column + ' coerce to: ' + coerce_dtype + ' from: ' + initial_dtype)110                    setattr(self, column, self.coerce_input_dtype(initial_dtype, coerce_dtype, df_series))111        except ValueError as err:112            logging.info('input validation problem')113            logging.info(err.args)114    def populate_outputs(self):115        # Create temporary DataFrame where each column name is the same as *ModelName*Outputs attributes116        """117        Create and return Model Output DataFrame where each column name is a model output parameter118        :param model: string, name of the model as referred to in class names (e.g. terrplant, sip, stir, etc..)119        :param model_obj: class instance, instance of the model class for which the120        :return:121        """122        # Import the model's output class (e.g. TerrplantOutputs) to create a DF to store the model outputs in123        mod_name = self.name.lower() + '.' + self.name.lower() + '_exe'124        #mod_name = "ubertool_ecorest.ubertool.ubertool." + self.name.lower() + "." + self.name.lower() + '_exe'125        module = importlib.import_module(mod_name)126        model_outputs = getattr(module, self.name + "Outputs")127        model_outputs_obj = model_outputs()128        df = pd.DataFrame()129        for input_param in model_outputs_obj.__dict__:130            df[input_param] = getattr(self, input_param)131            setattr(self, input_param, df[input_param])132        return df133    def fill_output_dataframe(self):134        """ Combine all output properties into Pandas Dataframe """135        for column in self.pd_obj_out.columns:136            try:137                output = getattr(self, column)138                #print(output)139                if isinstance(output, pd.Series):140                    # Ensure model output is a Pandas Series. Only Series can be141                    # reliably put into a Pandas DataFrame.142                    self.pd_obj_out[column] = output143                else:144                    print('"{}" is not a Pandas Series. Returned outputs must be a Pandas Series'.format(column))145            except:146                print("output dataframe error on " + column)147        #print('output dataframe')148        #print(self.pd_obj_out)149        return150    def get_dict_rep(self):151        """152        Convert DataFrames to dictionary, returning a tuple (inputs, outputs, exp_out)153        :param model_obj: model instance154        :return: (dict(input DataFrame), dict(outputs DataFrame), dict(expected outputs DataFrame))155        """156        name = self.name157        if self.name.lower() == "varroapop":158            try:159                return self.to_dict(self.pd_obj), \160                   self.pd_obj_out.to_dict('list'), \161                   self.pd_obj_exp.to_dict('list')162            except AttributeError:163                return self.to_dict(self.pd_obj), \164                   self.pd_obj_out.to_dict('list'), \165                   {}166        elif self.name.lower() == "sam":167            try:168                return self.to_dict(self.pd_obj), \169                    self.pd_obj_out, \170                    self.pd_obj_exp171            except AttributeError as ex:172                return self.to_dict(self.pd_obj), \173                       {"error": str(ex)}, \174                       {}175        try:176            return self.to_dict(self.pd_obj), \177                   self.to_dict(self.pd_obj_out), \178                   self.to_dict(self.pd_obj_exp)179        except AttributeError:180            return self.to_dict(self.pd_obj), \181                   self.to_dict(self.pd_obj_out), \182                   {}183    @staticmethod184    def to_dict(df):185        """186        This is an override of the the pd.DataFrame.to_dict() method where the keys in187        return dictionary are cast to strings. This fixes an error where duck typing would188        sometimes allow non-String keys, which fails when Flask serializes the dictionary to189        JSON string to return the HTTP response.190        Original method returns: dict((str(k), v.to_dict()) for k, v in compat.iteritems(df))191        :param df:192        :return:193        """194        out = {}195        for k, v in df.items():196            col = k197            for row, value in v.items():198                out[col] = {str(row): value}199        return out200class ModelSharedInputs(object):201    def __init__(self):202        """203        Container for the shared model inputs amongst most models (e.g. version, chemical name, & PC Code)204        """205        super(ModelSharedInputs, self).__init__()206        self.csrfmiddlewaretoken = pd.Series([], dtype="object")207        self.version = pd.Series([], dtype="object")208        self.chemical_name = pd.Series([], dtype="object")...

bwypy.py

Source:bwypy.py

1try:2    import ujson as jsonlib3except:4    import json as jsonlib5import pandas as pd6import logging7import time8import urllib9class Bwypy:10    default = {"database": "", "search_limits": [],11               "words_collation": "Case_Sensitive", "compare_limits": [],12               "method": "return_json",13               "counttype": ["TextCount", "WordCount"], "groups": []}14    def __init__(self, endpoint, db):15        self.endpoint = endpoint16        self.default['database'] = db17        self._fields = None18    @property19    def fields(self):20        '''21        Return Pandas object with all the fields in a Bookworm22        '''23        if self._fields is None:24            q = {'database': self.default['database'],25                 'method': 'returnPossibleFields'}26            obj = self._fetch(q)27            df = pd.DataFrame(obj)28            self._fields = df29        return self._fields30    def stats(self):31        q = self.default.copy()32        # Let's hope nobody creates a bookworm on the history of the universe:33        q['search_limits'] = [{"date_year": {"$lte": 10000}}]34        return self.search(q)35    def search(self, query, coerce_dtype=True):36        logging.debug(query)37        response = self._fetch(query, type="json")[0]38        rows = self._expand(response, query['groups'], query['counttype'])39        '''40        # Until Pandas supports compound dtype statements, this type coercion41        # is pointless, so using convert_objects instead.42        # Watch https://github.com/pydata/pandas/issues/446443        if coerce_dtype:44            # Get expected datatypes from DB45            fields = bw.fields46            # Key for sql dtypes => Pandas47            fieldkey = { "integer": "int64", "character": "string" }48            counttype_dtypes = { "TextCount": "uint64",49                                 "WordCount": "uint64",50                                 "WordsPerMillion": "float32",51                                 "TextPercent": "float16" }52            db_group_dtypes = [fields[fields.dbname == gname]['type'].iloc[0]53                               for gname in q['groups']]54            count_dtypes = [counttype_dtypes[fieldname]55                            for fieldname in q['counttype']]56            dtypes = [fieldkey[db_dtype] for db_dtype in db_group_dtypes] +57                    count_dtypes58            df = pd.DataFrame(rows, dtype)59        else:60            df = pd.DataFrame(rows)61        '''62        df = pd.DataFrame(rows)63        if coerce_dtype:64            ''' Copying objects. Hmmm... '''65            df = df.convert_objects(convert_numeric=True)66        if (len(query['groups'])) > 0:67            df.set_index(query['groups'], inplace=True)68        return df69    def _fetch(self, query, type="pandas"):70        ''' Get results from a bookworm Server71            This method calls JSON and converts to Pandas, rather than using72            Bookworm's built-in DataFrame return method, as JSON is a more73            transparent and safer format for data interchange.74        '''75        start = time.time()76        # Regardless of how the query is provided, coerce to 'return_json'77        # query['method'] = 'return_json' # BREAKS CALLS TO self.fields()78        qurl = "%s?queryTerms=%s" % (self.endpoint, jsonlib.dumps(query))79        try:80            f = urllib.urlopen(qurl)81            response = jsonlib.loads(f.read())82        except:83            # Python 3, being lazy here84            import requests85            r = requests.get(qurl, verify=False)86            response = r.json()87        if type == "pandas":88            response = pd.DataFrame(response)89        elif type == "json":90            pass91        logging.debug("Query time: %ds" % (time.time()-start))92        return response93    def _expand(self, o, grouplist, counttypes, collector=[]):94        '''95        A recursive method for exploding results into rows, one line per set of96        facets97        '''98        new_coll = []99        if len(grouplist) == 0:100            l = []101            for i, val in enumerate(o):102                counttype = counttypes[i]103                l += [(counttype, val)]104            return [dict(collector + l)]105        else:106            l = []107            for k, v in o.items():108                item = (grouplist[0], k)109                new_coll = collector + [item]110                l += self._expand(v, grouplist[1:], counttypes, new_coll)...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.