How to use coerce_dtype method in pandera

Best Python code snippet using pandera_python

SetOps.py

Source:SetOps.py Github

copy

Full Screen

...18uncoerce_dtype = unflatten_dtype19def argsort(ar):20 ar = np.asanyarray(ar)21 if ar.ndim > 1:22 ar, _, _, _ = coerce_dtype(ar)23 return recast_permutation(np.argsort(ar, kind='mergesort'))24def unique(ar, return_index=False, return_inverse=False,25 return_counts=False, axis=0, sorting=None, minimal_dtype=False):26 """27 A variant on np.unique with default support for `axis=0` and sorting28 """29 ar = np.asanyarray(ar)30 if ar.ndim == 1:31 ret = unique1d(ar, return_index=return_index, return_inverse=return_inverse,32 return_counts=return_counts, sorting=sorting, minimal_dtype=minimal_dtype)33 return ret34 # axis was specified and not None35 try:36 ar = np.moveaxis(ar, axis, 0)37 except np.AxisError:38 # this removes the "axis1" or "axis2" prefix from the error message39 raise np.AxisError(axis, ar.ndim)40 # coerce the data into the approrpriate shape41 consolidated, dtype, orig_shape, orig_dtype = coerce_dtype(ar)42 output = unique1d(consolidated,43 return_index=return_index, return_inverse=return_inverse,44 return_counts=return_counts, sorting=sorting, minimal_dtype=minimal_dtype)45 output = (uncoerce_dtype(output[0], orig_shape, orig_dtype, axis),) + output[1:]46 return output47def unique1d(ar, return_index=False, return_inverse=False,48 return_counts=False, sorting=None, minimal_dtype=False):49 """50 Find the unique elements of an array, ignoring shape.51 """52 ar = np.asanyarray(ar)53 if sorting is None:54 if minimal_dtype:55 sorting = recast_permutation(ar.argsort(kind='mergesort')) # we want to have stable sorts throughout56 else:57 sorting = ar.argsort(kind='mergesort') # we want to have stable sorts throughout58 ar = ar[sorting]59 mask = np.empty(ar.shape, dtype=np.bool_)60 mask[:1] = True61 mask[1:] = ar[1:] != ar[:-1]62 ret = (ar[mask], sorting)63 if return_index:64 ret += (sorting[mask],)65 if return_inverse:66 imask = np.cumsum(mask) - 167 inv_idx = np.empty(mask.shape, dtype=np.intp)68 inv_idx[sorting] = imask69 ret += (inv_idx,)70 if return_counts:71 idx = np.concatenate(np.nonzero(mask) + ([mask.size],))72 ret += (np.diff(idx),)73 return ret74def intersection(ar1, ar2,75 assume_unique=False, return_indices=False,76 sortings=None, union_sorting=None, minimal_dtype=False77 ):78 ar1 = np.asanyarray(ar1)79 ar2 = np.asanyarray(ar2)80 if ar1.dtype < ar2.dtype:81 ar1 = ar1.astype(ar2.dtype)82 elif ar1.dtype < ar2.dtype:83 ar2 = ar2.astype(ar1.dtype)84 if ar1.ndim == 1:85 ret = intersect1d(ar1, ar2, assume_unique=assume_unique, return_indices=return_indices,86 sortings=sortings, union_sorting=union_sorting, minimal_dtype=minimal_dtype)87 return ret88 ar1, dtype, orig_shape1, orig_dtype1 = coerce_dtype(ar1)89 ar2, dtype, orig_shape2, orig_dtype2 = coerce_dtype(ar2, dtype=dtype)90 output = intersect1d(ar1, ar2, assume_unique=assume_unique, return_indices=return_indices,91 sortings=sortings, union_sorting=union_sorting)92 output = (uncoerce_dtype(output[0], orig_shape1, orig_dtype1, None),) + output[1:]93 return output94def intersect1d(ar1, ar2,95 assume_unique=False, return_indices=False,96 sortings=None, union_sorting=None, minimal_dtype=False97 ):98 """99 Find the intersection of two arrays.100 """101 ar1 = np.asanyarray(ar1)102 ar2 = np.asanyarray(ar2)103 if not assume_unique:104 if return_indices:105 if sortings is not None:106 ar1, sorting1, ind1 = unique1d(ar1, return_index=True, sorting=sortings[0], minimal_dtype=minimal_dtype)107 ar2, sorting2, ind2 = unique1d(ar2, return_index=True, sorting=sortings[1], minimal_dtype=minimal_dtype)108 else:109 ar1, sorting1, ind1 = unique1d(ar1, return_index=True, minimal_dtype=minimal_dtype)110 ar2, sorting2, ind2 = unique1d(ar2, return_index=True, minimal_dtype=minimal_dtype)111 else:112 if sortings is not None:113 ar1, sorting1 = unique1d(ar1, sorting=sortings[0], minimal_dtype=minimal_dtype)114 ar2, sorting2 = unique1d(ar2, sorting=sortings[1], minimal_dtype=minimal_dtype)115 else:116 ar1, sorting1 = unique1d(ar1)117 ar2, sorting2 = unique1d(ar2)118 sortings = (sorting1, sorting2)119 aux = np.concatenate((ar1, ar2))120 if union_sorting is None:121 aux_sort_indices = np.argsort(aux, kind='mergesort')122 if minimal_dtype:123 aux_sort_indices = recast_permutation(aux_sort_indices)124 aux = aux[aux_sort_indices]125 else:126 aux_sort_indices = union_sorting127 aux = aux[aux_sort_indices]128 mask = aux[1:] == aux[:-1]129 int1d = aux[:-1][mask]130 if return_indices:131 ar1_indices = aux_sort_indices[:-1][mask]132 ar2_indices = aux_sort_indices[1:][mask] - ar1.size133 if not assume_unique:134 ar1_indices = ind1[ar1_indices]135 ar2_indices = ind2[ar2_indices]136 return int1d, sortings, union_sorting, ar1_indices, ar2_indices137 else:138 return int1d, sortings, union_sorting139def contained(ar1, ar2, assume_unique=False, invert=False,140 sortings=None, union_sorting=None, method=None):141 """142 Test whether each element of `ar1` is also present in `ar2`.143 """144 # Ravel both arrays, behavior for the first array could be different145 ar1 = np.asanyarray(ar1)146 ar2 = np.asanyarray(ar2)147 if ar1.dtype < ar2.dtype:148 ar1 = ar1.astype(ar2.dtype)149 elif ar2.dtype < ar1.dtype:150 ar2 = ar2.astype(ar1.dtype)151 if ar1.ndim > 1:152 ar1, dtype, orig_shape1, orig_dtype1 = coerce_dtype(ar1)153 ar2, dtype, orig_shape2, orig_dtype2 = coerce_dtype(ar2, dtype=dtype)154 # Check if one of the arrays may contain arbitrary objects155 contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject156 # This code is run when157 # a) the first condition is true, making the code significantly faster158 # b) the second condition is true (i.e. `ar1` or `ar2` may contain159 # arbitrary objects), since then sorting is not guaranteed to work160 if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object:161 if invert:162 mask = np.ones(len(ar1), dtype=bool)163 for a in ar2:164 mask &= (ar1 != a)165 else:166 mask = np.zeros(len(ar1), dtype=bool)167 for a in ar2:168 mask |= (ar1 == a)169 return mask, sortings, union_sorting170 # Otherwise use sorting171 if assume_unique is not True and assume_unique is not False: # i.e. it's a bool172 assume_unique_1, assume_unique_2 = assume_unique173 else:174 assume_unique_1 = assume_unique_2 = assume_unique175 if not assume_unique_1:176 if sortings is None:177 ar1, sorting1, rev_idx = unique1d(ar1, return_inverse=True)178 else:179 ar1, sorting1, rev_idx = unique1d(ar1, sorting=sortings[0], return_inverse=True)180 else:181 if sortings is not None:182 sorting1 = sortings[0]183 else:184 sorting1 = None185 if not assume_unique_2:186 if sortings is None:187 ar2, sorting2 = unique1d(ar2)188 else:189 ar2, sorting2 = unique1d(ar2, sorting=sortings[1])190 else:191 if sortings is not None:192 sorting2 = sortings[1]193 else:194 sorting2 = None195 sortings = (sorting1, sorting2)196 if method is not None and method == 'find':197 find_pos, _ = find(ar2, ar1, sorting='sorted', check=False) #binary search is fast198 if invert:199 ret = ar2[find_pos] != ar1200 else:201 ret = ar2[find_pos] == ar1202 order = None203 else:204 ar = np.concatenate((ar1, ar2))205 # We need this to be a stable sort, so always use 'mergesort'206 # here. The values from the first array should always come before207 # the values from the second array.208 if union_sorting is None:209 order = ar.argsort(kind='mergesort')210 else:211 order = union_sorting212 sar = ar[order]213 if invert:214 bool_ar = (sar[1:] != sar[:-1])215 else:216 bool_ar = (sar[1:] == sar[:-1])217 flag = np.concatenate((bool_ar, [invert]))218 ret = np.empty(ar.shape, dtype=bool)219 ret[order] = flag220 if assume_unique_1:221 return ret[:len(ar1)], sortings, order222 else:223 return ret[rev_idx], sortings, order224def difference(ar1, ar2, assume_unique=False, sortings=None, method=None, union_sorting=None):225 """226 Calculates set differences over any shape of array227 """228 ar1 = np.asanyarray(ar1)229 ar2 = np.asanyarray(ar2)230 if ar1.dtype < ar2.dtype:231 ar1 = ar1.astype(ar2.dtype)232 elif ar2.dtype < ar1.dtype:233 ar2 = ar2.astype(ar1.dtype)234 if ar1.ndim == 1:235 ret = difference1d(ar1, ar2, assume_unique=assume_unique, method=method,236 sortings=sortings, union_sorting=union_sorting)237 return ret238 ar1, dtype, orig_shape1, orig_dtype1 = coerce_dtype(ar1)239 ar2, dtype, orig_shape2, orig_dtype2 = coerce_dtype(ar2, dtype=dtype)240 output = difference1d(ar1, ar2, assume_unique=assume_unique, method=method,241 sortings=sortings, union_sorting=union_sorting)242 output = (uncoerce_dtype(output[0], orig_shape1, orig_dtype1, None),) + output[1:]243 return output244def difference1d(ar1, ar2, assume_unique=False, sortings=None, method=None, union_sorting=None):245 """246 Calculates set differences in 1D247 """248 if not assume_unique:249 if sortings is not None:250 ar1, sorting1 = unique(ar1, sorting=sortings[0])251 ar2, sorting2 = unique(ar2, sorting=sortings[0])252 else:253 ar1, sorting1 = unique(ar1)254 ar2, sorting2 = unique(ar2)255 sortings = (sorting1, sorting2)256 in_spec = contained(ar1, ar2, sortings=sortings, union_sorting=union_sorting, assume_unique=True, method=method, invert=True)257 return (ar1[in_spec[0]],) + in_spec[1:]258def find1d(ar, to_find, sorting=None,259 search_space_sorting=None, return_search_space_sorting=False,260 check=True, minimal_dtype=False, missing_val='raise'261 ):262 """263 Finds elements in an array and returns sorting264 """265 presorted = isinstance(sorting, str) and sorting == 'sorted'266 if sorting is None:267 sorting = np.argsort(ar, kind='mergesort')268 if search_space_sorting is None and return_search_space_sorting:269 search_space_sorting = np.argsort(to_find, kind='mergesort')270 if search_space_sorting is not None:271 if isinstance(search_space_sorting, np.ndarray):272 search_space_inverse_sorting = np.argsort(search_space_sorting)273 else:274 search_space_sorting, search_space_inverse_sorting = search_space_sorting275 to_find = to_find[search_space_sorting]276 if presorted:277 vals = np.searchsorted(ar, to_find)278 else:279 vals = np.searchsorted(ar, to_find, sorter=sorting)280 if isinstance(vals, (np.integer, int)):281 vals = np.array([vals])282 # we have the ordering according to the _sorted_ version of `ar`283 # so now we need to invert that back to the unsorted version284 if len(sorting) > 0:285 big_vals = vals == len(ar)286 vals[big_vals] = -1287 if not presorted:288 vals = sorting[vals]289 if check:290 # now because of how searchsorted works, we need to check if the found values291 # truly agree with what we asked for292 bad_vals = ar[vals] != to_find293 if vals.shape == ():294 if bad_vals:295 vals = -1296 else:297 # print(vals, bad_vals)298 vals[bad_vals] = -1299 else:300 bad_vals = big_vals301 else:302 bad_vals = np.full_like(to_find, True)303 vals = np.full_like(vals, -1)304 if check and bad_vals.any():305 if isinstance(missing_val, str) and missing_val == 'raise':306 raise IndexError("{} not in array".format(to_find[bad_vals]))307 else:308 vals[bad_vals] = missing_val309 if minimal_dtype and not bad_vals.any(): # protecting the missings310 vals = downcast_index_array(vals, ar.shape[-1])311 if search_space_sorting is not None:312 vals = vals[search_space_inverse_sorting]313 ret = (vals, sorting,)314 if return_search_space_sorting:315 ret += ((search_space_sorting, search_space_inverse_sorting),)316 return ret317def find(ar, to_find, sorting=None,318 search_space_sorting=None,319 return_search_space_sorting=False,320 check=True, minimal_dtype=False, missing_val='raise'):321 """322 Finds elements in an array and returns sorting323 """324 ar = np.asanyarray(ar)325 to_find = np.asanyarray(to_find)326 if ar.dtype < to_find.dtype:327 ar = ar.astype(to_find.dtype)328 elif to_find.dtype < ar.dtype:329 to_find = to_find.astype(ar.dtype)330 # print(ar.dtype, to_find.dtype )331 if ar.ndim == 1:332 ret = find1d(ar, to_find, sorting=sorting, check=check,333 search_space_sorting=search_space_sorting,334 return_search_space_sorting=return_search_space_sorting,335 minimal_dtype=minimal_dtype, missing_val=missing_val336 )337 return ret338 ar, dtype, orig_shape1, orig_dtype1 = coerce_dtype(ar)339 to_find, dtype, orig_shape2, orig_dtype2 = coerce_dtype(to_find, dtype=dtype)340 output = find1d(ar, to_find, sorting=sorting, check=check,341 search_space_sorting=search_space_sorting,342 return_search_space_sorting=return_search_space_sorting,343 minimal_dtype=minimal_dtype, missing_val=missing_val344 )345 return output346def group_by1d(ar, keys, sorting=None, return_indices=False):347 """348 Splits an array by a keys349 :param ar:350 :type ar:351 :param keys:352 :type keys:353 :param sorting:354 :type sorting:355 :return:356 :rtype:357 """358 uinds, sorting, mask = unique(keys, sorting=sorting, return_inverse=True)359 _, _, inds = unique(mask[sorting], sorting=np.arange(len(mask)), return_index=True)360 groups = np.split(ar[sorting,], inds)[1:]361 ret = ((uinds, groups), sorting)362 if return_indices:363 ret += (inds,)364 return ret365def group_by(ar, keys, sorting=None, return_indices=False):366 """367 Groups an array by keys368 :param ar:369 :type ar:370 :param keys:371 :type keys:372 :param sorting:373 :type sorting:374 :return: group pairs & sorting info375 :rtype:376 """377 ar = np.asanyarray(ar)378 keys = np.asanyarray(keys)379 if keys.ndim == 1:380 ret = group_by1d(ar, keys, sorting=sorting, return_indices=return_indices)381 return ret382 keys, dtype, orig_shape, orig_dtype = coerce_dtype(keys)383 output = group_by1d(ar, keys, sorting=sorting, return_indices=return_indices)384 ukeys, groups = output[0]385 ukeys = uncoerce_dtype(ukeys, orig_shape, orig_dtype, None)386 output = ((ukeys, groups),) + output[1:]387 return output388def split_by_regions1d(ar, regions, sortings=None, return_indices=False):389 """390 :param regions:391 :type regions:392 :param ar1:393 :type ar1:394 :return:395 :rtype:396 """397 if sortings is None:398 sortings = (None, None)399 ar_sorting, region_sorting = sortings400 if ar_sorting is None:401 ar_sorting = argsort(ar)402 ar = ar[ar_sorting]403 if region_sorting is None:404 region_sorting = argsort(regions)405 insertion_spots = np.searchsorted(regions, ar, sorter=region_sorting)406 uinds, _, inds = unique(insertion_spots, sorting=np.arange(len(insertion_spots)), return_index=True)407 groups = np.split(ar, inds)[1:]408 output = (uinds, groups)409 if return_indices:410 return output, inds, sortings411 else:412 return output, sortings413def split_by_regions(ar, regions, sortings=None, return_indices=False):414 """415 Splits an array up by edges defined by regions.416 Operates in 1D but can take compound dtypes using lexicographic417 ordering.418 In that case it is on the user to ensure that lex ordering is what is desired.419 :param ar:420 :type ar:421 :param regions:422 :type regions:423 :param sortings:424 :type sortings:425 :return:426 :rtype:427 """428 ar = np.asanyarray(ar)429 regions = np.asanyarray(regions)430 if ar.ndim == 1:431 ret = split_by_regions1d(regions, ar, sortings=sortings, return_indices=return_indices)432 return ret433 ar, dtype, orig_shape, orig_dtype = coerce_dtype(ar)434 regions, dtype, orig_shape1, orig_dtype1 = coerce_dtype(ar, dtype=dtype)435 output = split_by_regions1d(regions, ar, sortings=sortings, return_indices=return_indices)436 uinds, groups = output[0]437 groups = uncoerce_dtype(groups, orig_shape, orig_dtype, None)438 output = ((uinds, groups),) + output[1:]...

Full Screen

Full Screen

uber_model.py

Source:uber_model.py Github

copy

Full Screen

1import importlib2import pandas as pd3from pandas import compat4from .parser import Parser5import logging6class UberModel(object):7 """8 Collection of static methods used across all the ubertool models.9 """10 def __init__(self):11 """Main utility class for building Ubertool model classes for model execution."""12 super(UberModel, self).__init__()13 self.name = self.__class__.__name__14 self.pd_obj = None15 self.pd_obj_exp = None16 self.pd_obj_out = None17 def validate_input_names(self, model_inputs, user_inputs):18 """19 Compare the user supplied inputs with the ModelInputs() class attributes, ensuring they match by name20 :param model_inputs: ModelInputs() class instance21 :return: Boolean22 """23 # Create temporary DataFrame where each column name is the same as ModelInputs attributes24 df = pd.DataFrame()25 for input_param in model_inputs.__dict__:26 df[input_param] = getattr(self, input_param)27 keys_a = set(df.keys())28 keys_b = set(self.pd_obj.keys())29 extras = keys_b - keys_a30 n_extras = len(extras)31 print(f'There are {n_extras} extra keys.')32 if(n_extras > 0): print(extras)33 missing = keys_a - keys_b34 n_missing = len(missing)35 print(f'There are {n_missing} missing keys.')36 if(n_missing > 0): print(missing)37 # Compare column names of temporary DataFrame (created above) to user-supply DataFrame from JSON38 #if df.columns.sort_values().equals(user_inputs.columns.sort_values()):39 if n_extras >= 0 and n_missing == 0:40 print('Input parameters match what is expected.')41 print(set(df.keys()))42 return True43 else:44 print('Inputs parameters do not have all required inputs.')45 msg_err1 = "Inputs parameters do not have all required inputs. Please see API documentation.\n"46 msg_err2 = "Expected: \n{}\n".format(df.columns.sort_values())47 msg_err3 = "Received: \n{}\n".format(self.pd_obj.columns.sort_values())48 missing = [item for item in keys_a if item not in keys_b]49 msg_missing = "missing the following field(s): \n{}\n".format(missing)50 extras = [item for item in keys_b if item not in keys_a]51 msg_extras = "the following extra field(s) were found: \n{}\n".format(extras)52 print(msg_err1 + msg_err2 + msg_err3 + msg_missing + msg_extras)53 raise ValueError(msg_err1 + msg_err2 + msg_err3 + msg_missing + msg_extras)54 def coerce_input_dtype(self, incoming_dtype, coerce_dtype, input_series):55 #logging.info(incoming_dtype)56 if coerce_dtype == 'object':57 return input_series.astype('object')58 elif coerce_dtype == 'float64':59 if incoming_dtype == 'object':60 #coerces strings to np.nans61 return pd.to_numeric(input_series, errors='coerce')62 elif incoming_dtype == 'float64':63 return input_series64 else:65 return input_series.astype('float64')66 elif coerce_dtype == 'int64' or 'int32':67 if incoming_dtype == 'object':68 #coerces strings to np.nans69 return pd.to_numeric(input_series, errors='coerce', downcast='int64')70 else:71 return input_series.astype('int64')72 else:73 print("dtype of {} is {}\n"74 "This format is not handled by UberModel.coerce_input_dtype()".format(input_series.name, coerce_dtype))75 return input_series76 @staticmethod77 def convert_index(df_in):78 """ Attempt to covert indices of input DataFrame to duck typed dtype """79 parser = Parser(df_in)80 df = parser.convert_axes()81 return df82 def populate_inputs(self, df_in):83 """84 Validate and assign user-provided model inputs to their respective class attributes85 :param df_in: Pandas DataFrame object of model input parameters86 """87 df_user = self.convert_index(df_in)88 # mod_name = self.name.lower() + '.' + self.name.lower() + '_exe'89 mod_name = "pram_flask.ubertool.ubertool." + self.name.lower() + "." + self.name.lower() + '_exe'90 #print(mod_name)91 try:92 # Import the model's input class (e.g. TedInputs) to compare user supplied inputs to93 module = importlib.import_module(mod_name)94 model_inputs_class = getattr(module, self.name + "Inputs")95 model_inputs = model_inputs_class()96 except ValueError as err:97 logging.info(mod_name)98 logging.info(err.args)99 try:100 if self.validate_input_names(model_inputs, df_user):101 # If the user-supplied DataFrame has the same column names as required by ModelInputs...102 # set each Series in the DataFrame to the corresponding ModelInputs attribute (member variable)103 # user_inputs_df = self._sanitize(df)104 for column in df_user.columns:105 coerce_dtype = str(getattr(model_inputs, column).dtype)106 df_series = df_user[column]107 initial_dtype = str(df_series.dtype)108 #if initial_dtype != coerce_dtype:109 #logging.info('var:' + column + ' coerce to: ' + coerce_dtype + ' from: ' + initial_dtype)110 setattr(self, column, self.coerce_input_dtype(initial_dtype, coerce_dtype, df_series))111 except ValueError as err:112 logging.info('input validation problem')113 logging.info(err.args)114 def populate_outputs(self):115 # Create temporary DataFrame where each column name is the same as *ModelName*Outputs attributes116 """117 Create and return Model Output DataFrame where each column name is a model output parameter118 :param model: string, name of the model as referred to in class names (e.g. terrplant, sip, stir, etc..)119 :param model_obj: class instance, instance of the model class for which the120 :return:121 """122 # Import the model's output class (e.g. TerrplantOutputs) to create a DF to store the model outputs in123 mod_name = self.name.lower() + '.' + self.name.lower() + '_exe'124 #mod_name = "ubertool_ecorest.ubertool.ubertool." + self.name.lower() + "." + self.name.lower() + '_exe'125 module = importlib.import_module(mod_name)126 model_outputs = getattr(module, self.name + "Outputs")127 model_outputs_obj = model_outputs()128 df = pd.DataFrame()129 for input_param in model_outputs_obj.__dict__:130 df[input_param] = getattr(self, input_param)131 setattr(self, input_param, df[input_param])132 return df133 def fill_output_dataframe(self):134 """ Combine all output properties into Pandas Dataframe """135 for column in self.pd_obj_out.columns:136 try:137 output = getattr(self, column)138 #print(output)139 if isinstance(output, pd.Series):140 # Ensure model output is a Pandas Series. Only Series can be141 # reliably put into a Pandas DataFrame.142 self.pd_obj_out[column] = output143 else:144 print('"{}" is not a Pandas Series. Returned outputs must be a Pandas Series'.format(column))145 except:146 print("output dataframe error on " + column)147 #print('output dataframe')148 #print(self.pd_obj_out)149 return150 def get_dict_rep(self):151 """152 Convert DataFrames to dictionary, returning a tuple (inputs, outputs, exp_out)153 :param model_obj: model instance154 :return: (dict(input DataFrame), dict(outputs DataFrame), dict(expected outputs DataFrame))155 """156 name = self.name157 if self.name.lower() == "varroapop":158 try:159 return self.to_dict(self.pd_obj), \160 self.pd_obj_out.to_dict('list'), \161 self.pd_obj_exp.to_dict('list')162 except AttributeError:163 return self.to_dict(self.pd_obj), \164 self.pd_obj_out.to_dict('list'), \165 {}166 elif self.name.lower() == "sam":167 try:168 return self.to_dict(self.pd_obj), \169 self.pd_obj_out, \170 self.pd_obj_exp171 except AttributeError as ex:172 return self.to_dict(self.pd_obj), \173 {"error": str(ex)}, \174 {}175 try:176 return self.to_dict(self.pd_obj), \177 self.to_dict(self.pd_obj_out), \178 self.to_dict(self.pd_obj_exp)179 except AttributeError:180 return self.to_dict(self.pd_obj), \181 self.to_dict(self.pd_obj_out), \182 {}183 @staticmethod184 def to_dict(df):185 """186 This is an override of the the pd.DataFrame.to_dict() method where the keys in187 return dictionary are cast to strings. This fixes an error where duck typing would188 sometimes allow non-String keys, which fails when Flask serializes the dictionary to189 JSON string to return the HTTP response.190 Original method returns: dict((str(k), v.to_dict()) for k, v in compat.iteritems(df))191 :param df:192 :return:193 """194 out = {}195 for k, v in df.items():196 col = k197 for row, value in v.items():198 out[col] = {str(row): value}199 return out200class ModelSharedInputs(object):201 def __init__(self):202 """203 Container for the shared model inputs amongst most models (e.g. version, chemical name, & PC Code)204 """205 super(ModelSharedInputs, self).__init__()206 self.csrfmiddlewaretoken = pd.Series([], dtype="object")207 self.version = pd.Series([], dtype="object")208 self.chemical_name = pd.Series([], dtype="object")...

Full Screen

Full Screen

bwypy.py

Source:bwypy.py Github

copy

Full Screen

1try:2 import ujson as jsonlib3except:4 import json as jsonlib5import pandas as pd6import logging7import time8import urllib9class Bwypy:10 default = {"database": "", "search_limits": [],11 "words_collation": "Case_Sensitive", "compare_limits": [],12 "method": "return_json",13 "counttype": ["TextCount", "WordCount"], "groups": []}14 def __init__(self, endpoint, db):15 self.endpoint = endpoint16 self.default['database'] = db17 self._fields = None18 @property19 def fields(self):20 '''21 Return Pandas object with all the fields in a Bookworm22 '''23 if self._fields is None:24 q = {'database': self.default['database'],25 'method': 'returnPossibleFields'}26 obj = self._fetch(q)27 df = pd.DataFrame(obj)28 self._fields = df29 return self._fields30 def stats(self):31 q = self.default.copy()32 # Let's hope nobody creates a bookworm on the history of the universe:33 q['search_limits'] = [{"date_year": {"$lte": 10000}}]34 return self.search(q)35 def search(self, query, coerce_dtype=True):36 logging.debug(query)37 response = self._fetch(query, type="json")[0]38 rows = self._expand(response, query['groups'], query['counttype'])39 '''40 # Until Pandas supports compound dtype statements, this type coercion41 # is pointless, so using convert_objects instead.42 # Watch https://github.com/pydata/pandas/issues/446443 if coerce_dtype:44 # Get expected datatypes from DB45 fields = bw.fields46 # Key for sql dtypes => Pandas47 fieldkey = { "integer": "int64", "character": "string" }48 counttype_dtypes = { "TextCount": "uint64",49 "WordCount": "uint64",50 "WordsPerMillion": "float32",51 "TextPercent": "float16" }52 db_group_dtypes = [fields[fields.dbname == gname]['type'].iloc[0]53 for gname in q['groups']]54 count_dtypes = [counttype_dtypes[fieldname]55 for fieldname in q['counttype']]56 dtypes = [fieldkey[db_dtype] for db_dtype in db_group_dtypes] +57 count_dtypes58 df = pd.DataFrame(rows, dtype)59 else:60 df = pd.DataFrame(rows)61 '''62 df = pd.DataFrame(rows)63 if coerce_dtype:64 ''' Copying objects. Hmmm... '''65 df = df.convert_objects(convert_numeric=True)66 if (len(query['groups'])) > 0:67 df.set_index(query['groups'], inplace=True)68 return df69 def _fetch(self, query, type="pandas"):70 ''' Get results from a bookworm Server71 This method calls JSON and converts to Pandas, rather than using72 Bookworm's built-in DataFrame return method, as JSON is a more73 transparent and safer format for data interchange.74 '''75 start = time.time()76 # Regardless of how the query is provided, coerce to 'return_json'77 # query['method'] = 'return_json' # BREAKS CALLS TO self.fields()78 qurl = "%s?queryTerms=%s" % (self.endpoint, jsonlib.dumps(query))79 try:80 f = urllib.urlopen(qurl)81 response = jsonlib.loads(f.read())82 except:83 # Python 3, being lazy here84 import requests85 r = requests.get(qurl, verify=False)86 response = r.json()87 if type == "pandas":88 response = pd.DataFrame(response)89 elif type == "json":90 pass91 logging.debug("Query time: %ds" % (time.time()-start))92 return response93 def _expand(self, o, grouplist, counttypes, collector=[]):94 '''95 A recursive method for exploding results into rows, one line per set of96 facets97 '''98 new_coll = []99 if len(grouplist) == 0:100 l = []101 for i, val in enumerate(o):102 counttype = counttypes[i]103 l += [(counttype, val)]104 return [dict(collector + l)]105 else:106 l = []107 for k, v in o.items():108 item = (grouplist[0], k)109 new_coll = collector + [item]110 l += self._expand(v, grouplist[1:], counttypes, new_coll)...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pandera automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful