How to use test_logfile method in avocado

Best Python code snippet using avocado_python

LogFile.py

Source:LogFile.py Github

copy

Full Screen

1"""2 Author: Stephen Pauwels3"""4import copy5import multiprocessing as mp6import numpy as np7import pandas as pd8from dateutil.parser import parse9import math10class LogFile:11 def __init__(self, filename, delim, header, rows, time_attr, trace_attr, activity_attr = None, time_format = None, cycle_attr= None, values = None, integer_input = False, convert = True, k = 1, dtype=None):12 self.filename = filename13 self.time = time_attr14 self.trace = trace_attr15 self.activity = activity_attr16 self.time_format = time_format17 self.cycle_attr = cycle_attr18 if values is not None:19 self.values = values20 else:21 self.values = {}22 self.numericalAttributes = set()23 self.categoricalAttributes = set()24 self.ignoreHistoryAttributes = set()25 if self.trace is None:26 self.k = 027 else:28 self.k = k29 type = "str"30 if integer_input:31 type = "int"32 if filename is not None:33 if dtype is not None:34 self.data = pd.read_csv(self.filename, header=header, nrows=rows, delimiter=delim, encoding='latin-1', dtype=dtype)35 else:36 self.data = pd.read_csv(self.filename, header=header, nrows=rows, delimiter=delim, encoding='latin-1')37 # Determine types for all columns - numerical or categorical38 for col_type in self.data.dtypes.iteritems():39 if col_type[1] == 'float64':40 self.numericalAttributes.add(col_type[0])41 else:42 self.categoricalAttributes.add(col_type[0])43 if convert:44 self.convert2int()45 self.contextdata = None46 def get_data(self):47 if self.contextdata is None:48 return self.data49 return self.contextdata50 51 def int_convert(self):52 if self.cycle_attr != None:53 cycle_targets = self.data[self.cycle_attr].unique()54 cycle_map = {name: n+1 for n, name in enumerate(cycle_targets)}55 self.data[self.cycle_attr] = self.data[self.cycle_attr].replace(cycle_map)56 targets = self.data[self.activity].unique()57 map_to_int = {name: n+1 for n, name in enumerate(targets)}58 int_to_map = {n+1: name for n, name in enumerate(targets)}59 60 self.data[self.activity] = self.data[self.activity].replace(map_to_int)61 return int_to_map62 def create_subset(self, percentage):63 cases = self.data[self.trace].unique()64 amount_cases_subset = round(len(cases) * (percentage / 100))65 subset_data = self.data[self.data[self.trace].isin(cases[:amount_cases_subset])]66 67 sub_logfile = LogFile(None, None, None, None, self.time, self.trace, self.activity, self.time_format, self.cycle_attr, self.values, False, False)68 sub_logfile.filename = self.filename69 sub_logfile.values = self.values70 sub_logfile.time_format = self.time_format71 sub_logfile.contextdata = None72 sub_logfile.categoricalAttributes = self.categoricalAttributes73 sub_logfile.numericalAttributes = self.numericalAttributes74 sub_logfile.data = subset_data75 sub_logfile.k = self.k76 return sub_logfile77 def delete_end_activity(self):78 self.data = self.data[self.data[self.activity] != 'End']79 def get_cases(self):80 return self.get_data().groupby([self.trace])81 82 def set_k_longest_trace(self):83 cases = self.data.groupby([self.trace])84 for case in cases:85 if len(case[1]) > self.k:86 self.k = len(case[1])87 88 def filter_case_length(self, min_length):89 cases = self.data.groupby([self.trace])90 filtered_cases = []91 for case in cases:92 if len(case[1]) > min_length:93 filtered_cases.append(case[1])94 self.data = pd.concat(filtered_cases, ignore_index=True)95 def convert2int(self):96 self.convert2ints("../converted_ints.csv")97 def convert2ints(self, file_out):98 """99 Convert csv file with string values to csv file with integer values.100 (File/string operations more efficient than pandas operations)101 :param file_out: filename for newly created file102 :return: number of lines converted103 """104 self.data = self.data.apply(lambda x: self.convert_column2ints(x))105 self.data.to_csv(file_out, index=False)106 107 def add_start_date(self):108 unique_cases = self.data[self.trace].unique()109 mapper_dict = {}110 for i in unique_cases:111 df_case = self.data[self.data[self.trace] == i]112 start_time = df_case.iloc[0][self.time]113 mapper_dict[i] = start_time114 self.data['Start Date'] = self.data[self.trace].map(mapper_dict)115 def convert_column2ints(self, x):116 def test(a, b):117 # Return all elements from a that are not in b, make use of the fact that both a and b are unique and sorted118 a_ix = 0119 b_ix = 0120 new_uniques = []121 while a_ix < len(a) and b_ix < len(b):122 if a[a_ix] < b[b_ix]:123 new_uniques.append(a[a_ix])124 a_ix += 1125 elif a[a_ix] > b[b_ix]:126 b_ix += 1127 else:128 a_ix += 1129 b_ix += 1130 if a_ix < len(a):131 new_uniques.extend(a[a_ix:])132 return new_uniques133 if self.isNumericAttribute(x.name):134 return x135 if self.time is not None and x.name == self.time:136 return x137 print("PREPROCESSING: Converting", x.name)138 if x.name not in self.values:139 x = x.astype("str")140 self.values[x.name], y = np.unique(x, return_inverse=True)141 return y + 1142 else:143 x = x.astype("str")144 self.values[x.name] = np.append(self.values[x.name], test(np.unique(x), self.values[x.name]))145 print("PREPROCESSING: Substituting values with ints")146 xsorted = np.argsort(self.values[x.name])147 ypos = np.searchsorted(self.values[x.name][xsorted], x)148 indices = xsorted[ypos]149 return indices + 1150 def convert_string2int(self, column, value):151 if column not in self.values:152 return value153 vals = self.values[column]154 found = np.where(vals==value)155 if len(found[0]) == 0:156 return None157 else:158 return found[0][0] + 1159 def convert_int2string(self, column, int_val):160 if column not in self.values:161 return int_val162 return self.values[column][int_val - 1]163 def attributes(self):164 return self.data.columns165 def keep_attributes(self, keep_attrs):166 if self.time and self.time not in keep_attrs and self.time in self.data:167 keep_attrs.append(self.time)168 if self.trace and self.trace not in keep_attrs:169 keep_attrs.append(self.trace)170 self.data = self.data[keep_attrs]171 def remove_attributes(self, remove_attrs):172 """173 Remove attributes with the given prefixes from the data174 :param remove_attrs: a list of prefixes of attributes that should be removed from the data175 :return: None176 """177 remove = []178 for attr in self.data:179 for prefix in remove_attrs:180 if attr.startswith(prefix):181 remove.append(attr)182 break183 self.data = self.data.drop(remove, axis=1)184 def filter(self, filter_condition):185 self.data = self.data[eval(filter_condition)]186 def filter_copy(self, filter_condition):187 log_copy = copy.deepcopy(self)188 log_copy.data = self.data[eval(filter_condition)]189 return log_copy190 def get_column(self, attribute):191 return self.data[attribute]192 def get_labels(self, label):193 labels = {}194 if self.trace is None:195 for row in self.data.itertuples():196 labels[row.Index] = getattr(row, label)197 else:198 traces = self.data.groupby([self.trace])199 for trace in traces:200 labels[trace[0]] = getattr(trace[1].iloc[0], label)201 return labels202 def create_trace_attribute(self):203 print("Create trace attribute")204 with mp.Pool(mp.cpu_count()) as p:205 result = p.map(self.create_trace_attribute_case, self.data.groupby([self.trace]))206 self.data = pd.concat(result)207 self.categoricalAttributes.add("trace")208 def create_trace_attribute_case(self, case_tuple):209 trace = []210 case_data = pd.DataFrame()211 for row in case_tuple[1].iterrows():212 row_content = row[1]213 trace.append(row_content[self.activity])214 row_content["trace"] = str(trace)215 case_data = case_data.append(row_content)216 return case_data217 def create_k_context(self):218 """219 Create the k-context from the current LogFile220 :return: None221 """222 print("Create k-context:", self.k)223 if self.k == 0:224 self.contextdata = self.data225 if self.contextdata is None:226 # result = map(self.create_k_context_trace, self.data.groupby([self.trace]))227 with mp.Pool(mp.cpu_count()) as p:228 result = p.map(self.create_k_context_trace, self.data.groupby([self.trace]))229 # result = map(self.create_k_context_trace, self.data.groupby([self.trace]))230 self.contextdata = pd.concat(result, ignore_index=True)231 def create_k_context_trace(self, trace):232 contextdata = pd.DataFrame()233 trace_data = trace[1]234 shift_data = trace_data.shift().fillna(0)235 shift_data.at[shift_data.first_valid_index(), self.trace] = trace[0]236 joined_trace = shift_data.join(trace_data, lsuffix="_Prev0")237 for i in range(1, self.k + 1):238 shift_data = shift_data.shift().fillna(0)239 shift_data.at[shift_data.first_valid_index(), self.trace] = trace[0]240 joined_trace = shift_data.join(joined_trace, lsuffix="_Prev%i" % i)241 contextdata = contextdata.append(joined_trace, ignore_index=True)242 contextdata = contextdata.astype("int", errors="ignore")243 return contextdata244 def add_duration_to_k_context(self):245 """246 Add durations to the k-context, only calculates if k-context has been calculated247 :return:248 """249 if self.contextdata is None:250 return251 for i in range(self.k):252 self.contextdata['duration_%i' %(i)] = self.contextdata.apply(self.calc_duration, axis=1, args=(i,))253 self.numericalAttributes.add("duration_%i" % (i))254 def calc_duration(self, row, k):255 if row[self.time + "_Prev%i" % (k)] != 0:256 startTime = parse(self.convert_int2string(self.time, int(row[self.time + "_Prev%i" % (k)])))257 endTime = parse(self.convert_int2string(self.time,int(row[self.time])))258 return (endTime - startTime).total_seconds()259 else:260 return 0261 def discretize(self,row, bins=25):262 if isinstance(bins, int):263 labels = [str(i) for i in range(1,bins+1)]264 else:265 labels = [str(i) for i in range(1,len(bins))]266 if self.isNumericAttribute(row):267 self.numericalAttributes.remove(row)268 self.categoricalAttributes.add(row)269 self.contextdata[row], binned = pd.cut(self.contextdata[row], bins, retbins=True, labels=labels)270 #self.contextdata[row] = self.contextdata[row].astype(str)271 #self.contextdata[row] = self.convert_column2ints(self.contextdata[row])272 return binned273 def isNumericAttribute(self, attribute):274 if attribute in self.numericalAttributes:275 return True276 else:277 for k in range(self.k):278 if attribute.replace("_Prev%i" % (k), "") in self.numericalAttributes:279 return True280 return False281 def isCategoricalAttribute(self, attribute):282 if attribute in self.categoricalAttributes:283 return True284 else:285 for k in range(self.k):286 if attribute.replace("_Prev%i" % (k), "") in self.categoricalAttributes:287 return True288 return False289 def add_end_events(self):290 cases = self.get_cases()291 print("Run end event map")292 with mp.Pool(mp.cpu_count()) as p:293 result = p.map(self.add_end_event_case, cases)294 print("Combine results")295 new_data = []296 for r in result:297 new_data.extend(r)298 self.data = pd.DataFrame.from_records(new_data)299 def add_end_event_case(self, case_obj):300 case_name, case = case_obj301 new_data = []302 for i in range(0, len(case)):303 new_data.append(case.iloc[i].to_dict())304 record = {}305 for col in self.data:306 if col == self.trace:307 record[col] = case_name308 elif col == self.time:309 record[col] = new_data[-1][self.time]310 else:311 record[col] = "End"312 new_data.append(record)313 return new_data314 315 def create_split_df(self):316 split_data = self.data[(self.data[self.activity] == self.data[self.activity].iloc[0]) | (self.data[self.activity] == 'End')]317 split_data[self.time] = pd.to_datetime(split_data[self.time])318 split_logfile = LogFile(None, None, None, None, self.time, self.trace, self.activity, self.time_format, self.cycle_attr, self.values, False, False)319 split_logfile.filename = self.filename320 split_logfile.values = self.values321 split_logfile.time_format = self.time_format322 split_logfile.contextdata = split_data323 split_logfile.categoricalAttributes = self.categoricalAttributes324 split_logfile.numericalAttributes = self.numericalAttributes325 split_logfile.data = split_data326 split_logfile.k = self.k327 return split_logfile328 def split_train_test(self, split_interval, type):329 from sklearn.model_selection import train_test_split330 data = self.data[(self.data[self.activity] == self.data[self.activity].iloc[0]) | (self.data[self.activity] == 'End')]331 data[self.time] = pd.to_datetime(data[self.time])332 best_split = None333 loss = len(self.data)334 335 for i in split_interval:336 if type == 'lstm':337 train, test = train_test_split(self.contextdata[self.trace].unique(), test_size=(100-i)/100, shuffle=False)338 train_data = self.contextdata[self.contextdata[self.trace].isin(train)]339 test_data = self.contextdata[self.contextdata[self.trace].isin(test)]340 else:341 train, test = train_test_split(self.data[self.trace].unique(), test_size=(100-i)/100, shuffle=False)342 train_data = self.data[self.data[self.trace].isin(train)]343 test_data = self.data[self.data[self.trace].isin(test)]344 345 overlap = train_data[train_data[self.time] > test_data[self.time].min()][[self.trace]]346 347 if len(overlap) < loss:348 best_split = i349 loss = len(overlap)350 best_train = train_data[~train_data[self.trace].isin(overlap[self.trace].tolist())]351 best_test = test_data352 353 354 print('Train data lost due to overlap: ' + str(len(overlap)/len(train_data)) + "/n Best Split: " + str(best_split) )355 train_data_context = self.contextdata[self.contextdata[self.trace].isin(best_train[self.trace].unique())]356 test_data_context = self.contextdata[self.contextdata[self.trace].isin(best_test[self.trace].unique())]357 train_data = self.data[self.data[self.trace].isin(best_train[self.trace].unique())]358 test_data = self.data[self.data[self.trace].isin(best_test[self.trace].unique())]359 360 train_logfile = LogFile(None, None, None, None, self.time, self.trace, self.activity, self.time_format, self.cycle_attr, self.values, False, False)361 train_logfile.filename = self.filename362 train_logfile.values = self.values363 train_logfile.time_format = self.time_format364 train_logfile.contextdata = train_data_context365 train_logfile.categoricalAttributes = self.categoricalAttributes366 train_logfile.numericalAttributes = self.numericalAttributes367 train_logfile.data = train_data368 train_logfile.k = self.k369 test_logfile = LogFile(None, None, None, None, self.time, self.trace, self.activity, self.time_format, self.cycle_attr, self.values, False, False)370 test_logfile.filename = self.filename371 test_logfile.values = self.values372 test_logfile.time_format = self.time_format373 test_logfile.contextdata = test_data_context374 test_logfile.categoricalAttributes = self.categoricalAttributes375 test_logfile.numericalAttributes = self.numericalAttributes376 test_logfile.data = test_data377 test_logfile.k = self.k378 return train_logfile, test_logfile379 def splitTrainTest(self, train_percentage, split_case=True, method="train-test"):380 import random381 train_percentage = train_percentage / 100.0382 if split_case:383 if method == "random":384 train_inds = random.sample(range(self.data.shape[0]), k=round(self.contextdata.shape[0] * train_percentage))385 test_inds = list(set(range(self.data.shape[0])).difference(set(train_inds)))386 elif method == "train-test":387 train_inds = np.arange(0, self.data.shape[0] * train_percentage)388 test_inds = list(set(range(self.data.shape[0])).difference(set(train_inds)))389 else:390 test_inds = np.arange(0, self.data.shape[0] * (1 - train_percentage))391 train_inds = list(set(range(self.data.shape[0])).difference(set(test_inds)))392 else:393 train_inds = []394 test_inds = []395 cases = self.data[self.trace].unique()396 if method == "random":397 train_cases = random.sample(list(cases), k=round(len(cases) * train_percentage))398 test_cases = list(set(cases).difference(set(train_cases)))399 elif method == "train-test":400 train_cases = cases[:round(len(cases) * train_percentage)]401 test_cases = cases[round(len(cases) * train_percentage):]402 else:403 train_cases = cases[round(len(cases) * (1 - train_percentage)):]404 test_cases = cases[:round(len(cases) * (1 - train_percentage))]405 for train_case in train_cases:406 train_inds.extend(list(self.data[self.data[self.trace] == train_case].index))407 for test_case in test_cases:408 test_inds.extend(list(self.data[self.data[self.trace] == test_case].index))409 train = self.data.loc[train_inds]410 test = self.data.loc[test_inds]411 print("Train:", len(train_inds))412 print("Test:", len(test_inds))413 train_logfile = LogFile(None, None, None, None, self.time, self.trace, self.activity, self.values, False, False)414 train_logfile.filename = self.filename415 train_logfile.values = self.values416 train_logfile.contextdata = train417 train_logfile.categoricalAttributes = self.categoricalAttributes418 train_logfile.numericalAttributes = self.numericalAttributes419 train_logfile.data = self.data.loc[train_inds]420 train_logfile.k = self.k421 test_logfile = LogFile(None, None, None, None, self.time, self.trace, self.activity, self.values, False, False)422 test_logfile.filename = self.filename423 test_logfile.values = self.values424 test_logfile.contextdata = test425 test_logfile.categoricalAttributes = self.categoricalAttributes426 test_logfile.numericalAttributes = self.numericalAttributes427 test_logfile.data = self.data.loc[test_inds]428 test_logfile.k = self.k429 return train_logfile, test_logfile430 def split_days(self, date_format, num_days=1):431 from datetime import datetime432 self.contextdata["days"] = self.contextdata[self.time].map(lambda l: str(datetime.strptime(l, date_format).isocalendar()[:3]))433 days = {}434 for group_name, group in self.contextdata.groupby("days"):435 new_logfile = LogFile(None, None, None, None, self.time, self.trace, self.activity, self.values, False, False)436 new_logfile.filename = self.filename437 new_logfile.values = self.values438 new_logfile.categoricalAttributes = self.categoricalAttributes439 new_logfile.numericalAttributes = self.numericalAttributes440 new_logfile.k = self.k441 new_logfile.contextdata = group.drop("days", axis=1)442 new_logfile.data = new_logfile.contextdata[self.attributes()]443 days[group_name] = {}444 days[group_name]["data"] = new_logfile445 return days446 def split_weeks(self, date_format, num_days=1):447 from datetime import datetime448 self.contextdata["year_week"] = self.contextdata[self.time].map(lambda l: str(datetime.strptime(l, date_format).isocalendar()[:2]))449 weeks = {}450 for group_name, group in self.contextdata.groupby("year_week"):451 new_logfile = LogFile(None, None, None, None, self.time, self.trace, self.activity, self.values, False, False)452 new_logfile.filename = self.filename453 new_logfile.values = self.values454 new_logfile.categoricalAttributes = self.categoricalAttributes455 new_logfile.numericalAttributes = self.numericalAttributes456 new_logfile.k = self.k457 new_logfile.contextdata = group.drop("year_week", axis=1)458 new_logfile.data = new_logfile.contextdata[self.attributes()]459 year, week = eval(group_name)460 group_name = "%i/" % year461 if week < 10:462 group_name += "0"463 group_name += str(week)464 weeks[group_name] = {}465 weeks[group_name]["data"] = new_logfile466 return weeks467 def split_months(self, date_format, num_days=1):468 from datetime import datetime469 self.contextdata["month"] = self.contextdata[self.time].map(lambda l: str(datetime.strptime(l, date_format).strftime("%Y/%m")))470 months = {}471 for group_name, group in self.contextdata.groupby("month"):472 new_logfile = LogFile(None, None, None, None, self.time, self.trace, self.activity, self.values, False, False)473 new_logfile.filename = self.filename474 new_logfile.values = self.values475 new_logfile.categoricalAttributes = self.categoricalAttributes476 new_logfile.numericalAttributes = self.numericalAttributes477 new_logfile.k = self.k478 new_logfile.contextdata = group.drop("month", axis=1)479 new_logfile.data = new_logfile.contextdata[self.attributes()]480 months[group_name] = {}481 months[group_name]["data"] = new_logfile482 return months483 def split_date(self, date_format, year_week, from_week=None):484 from datetime import datetime485 self.contextdata["year_week"] = self.contextdata[self.time].map(lambda l: str(datetime.strptime(l, date_format).isocalendar()[:2]))486 if from_week:487 train = self.contextdata[(self.contextdata["year_week"] >= from_week) & (self.contextdata["year_week"] < year_week)]488 else:489 train = self.contextdata[self.contextdata["year_week"] < year_week]490 test = self.contextdata[self.contextdata["year_week"] == year_week]491 train_logfile = LogFile(None, None, None, None, self.time, self.trace, self.activity, self.values, False, False)492 train_logfile.filename = self.filename493 train_logfile.values = self.values494 train_logfile.contextdata = train495 train_logfile.categoricalAttributes = self.categoricalAttributes496 train_logfile.numericalAttributes = self.numericalAttributes497 train_logfile.data = train[self.attributes()]498 train_logfile.k = self.k499 test_logfile = LogFile(None, None, None, None, self.time, self.trace, self.activity, self.values, False, False)500 test_logfile.filename = self.filename501 test_logfile.values = self.values502 test_logfile.contextdata = test503 test_logfile.categoricalAttributes = self.categoricalAttributes504 test_logfile.numericalAttributes = self.numericalAttributes505 test_logfile.data = test[self.attributes()]506 test_logfile.k = self.k507 return train_logfile, test_logfile508 def create_folds(self, k):509 result = []510 folds = np.array_split(np.arange(0, self.contextdata.shape[0]), k)511 for f in folds:512 fold_context = self.contextdata.loc[f]513 logfile = LogFile(None, None, None, None, self.time, self.trace, self.activity, self.values, False, False)514 logfile.filename = self.filename515 logfile.values = self.values516 logfile.contextdata = fold_context517 logfile.categoricalAttributes = self.categoricalAttributes518 logfile.numericalAttributes = self.numericalAttributes519 logfile.data = self.data.loc[f]520 logfile.k = self.k521 result.append(logfile)522 return result523 def extend_data(self, log):524 train_logfile = LogFile(None, None, None, None, self.time, self.trace, self.activity, self.values, False, False)525 train_logfile.filename = self.filename526 train_logfile.values = self.values527 train_logfile.contextdata = self.contextdata.append(log.contextdata)528 train_logfile.categoricalAttributes = self.categoricalAttributes529 train_logfile.numericalAttributes = self.numericalAttributes530 train_logfile.data = self.data.append(log.data)531 train_logfile.k = self.k532 return train_logfile533 def get_traces(self):534 return [list(case[1][self.activity]) for case in self.get_cases()]535 def get_follows_relations(self, window=None):536 return self.get_traces_follows_relations(self.get_traces(), window)537 def get_traces_follows_relations(self, traces, window):538 follow_counts = {}539 counts = {}540 for trace in traces:541 for i in range(len(trace)):542 act = trace[i]543 if act not in follow_counts:544 follow_counts[act] = {}545 counts[act] = 0546 counts[act] += 1547 stop_value = len(trace)548 if window:549 stop_value = min(len(trace), i+window)550 for fol_act in set(trace[i+1:stop_value+1]):551 if fol_act not in follow_counts[act]:552 follow_counts[act][fol_act] = 0553 follow_counts[act][fol_act] += 1554 follows = {}555 for a in range(1, len(self.values[self.activity])+1):556 always = 0557 sometimes = 0558 if a in follow_counts:559 for b in follow_counts[a]:560 if a != b:561 if follow_counts[a][b] == counts[a]:562 always += 1563 else:564 sometimes += 1565 never = len(self.values[self.activity]) - always - sometimes566 follows[a] = (always, sometimes, never)567 return follows, follow_counts568 def get_relation_entropy(self):569 follows, _ = self.get_follows_relations()570 full_entropy = []571 for act in range(1, len(self.values[self.activity])+1):572 RC = follows[act]573 p_a = RC[0] / len(self.values[self.activity])574 p_s = RC[1] / len(self.values[self.activity])575 p_n = RC[2] / len(self.values[self.activity])576 entropy = 0577 if p_a != 0:578 entropy -= p_a * math.log(p_a)579 if p_s != 0:580 entropy -= p_s * math.log(p_s)581 if p_n != 0:582 entropy -= p_n * math.log(p_n)583 full_entropy.append(entropy)584 return full_entropy585 def get_j_measure_trace(self, trace, window):586 _, follows = self.get_traces_follows_relations([trace], window)587 j_measure = []588 value_counts = {}589 for e in trace:590 if e not in value_counts:591 value_counts[e] = 0592 value_counts[e] += 1593 for act_1 in range(1, len(self.values[self.activity])+1):594 for act_2 in range(1, len(self.values[self.activity]) + 1):595 num_events = len(trace)596 if act_1 in follows and act_2 in follows[act_1]:597 p_aFb = follows[act_1][act_2] / value_counts.get(act_1, 0)598 else:599 p_aFb = 0600 if act_1 not in value_counts:601 p_a = 0602 else:603 p_a = value_counts.get(act_1, 0)/ num_events604 if act_2 not in value_counts:605 p_b = 0606 else:607 p_b = value_counts.get(act_2, 0) / num_events608 j_value = 0609 if p_aFb != 0 and p_b != 0:610 j_value += p_aFb * math.log(p_aFb / p_b, 2)611 if p_aFb != 1 and p_b != 1:612 j_value += (1-p_aFb) * math.log((1-p_aFb) / (1-p_b), 2)613 j_measure.append(p_a * j_value)614 return j_measure615 def get_j_measure(self, window=5):616 traces = self.get_traces()617 # return [np.mean(self.get_j_measure_trace(trace, window)) for trace in traces]618 return [self.get_j_measure_trace(trace, window) for trace in traces]619 # j_measures = np.asarray([self.get_j_measure_trace(trace, window) for trace in traces])620 # avg_j_measures = [np.mean(j_measures[:,i]) for i in range(len(j_measures[0]))]621 # return avg_j_measures622def combine(logfiles):623 if len(logfiles) == 0:624 return None625 log = copy.deepcopy(logfiles[0])626 for i in range(1, len(logfiles)):627 log = log.extend_data(logfiles[i])...

Full Screen

Full Screen

run_varmisuse_benchs.py

Source:run_varmisuse_benchs.py Github

copy

Full Screen

1#!/usr/bin/env python2"""3Usage:4 run_varmisuse_benchs.py [options] LOG_TARGET_DIR5Options:6 -h --help Show this screen.7 --num-runs NUM Number of runs to perform for each configuration. [default: 5]8 --debug Turn on debugger.9"""10import os11import subprocess12import re13import numpy as np14from docopt import docopt15from dpu_utils.utils import run_and_debug16MODEL_TYPES = ["GGNN", "RGCN", "RGAT", "RGIN", "GNN-Edge-MLP0", "GNN-Edge-MLP1", "GNN_FiLM"]17TEST_RES_RE = re.compile('^Metrics: Accuracy: (0.\d+)')18VALID_RES_RE = re.compile('Best validation results: Accuracy: (0.\d+)')19MODEL_FILE_RE = re.compile('^Loading model from file (.+)\.')20def run(args):21 target_dir = args['LOG_TARGET_DIR']22 os.makedirs(target_dir, exist_ok=True)23 print("Starting VarMisuse experiments, will write logfiles for runs into %s." % target_dir)24 num_seeds = int(args.get('--num-runs'))25 print("| %- 14s | %- 17s | %- 17s | %- 17s |" % ("Model",26 "Valid Acc",27 "Test Acc",28 "TestOnly Acc"))29 print("|" + "-" * 16 + "|" + "-" * 19 + "|" + "-" * 19 + "|" + "-" * 19 + "|")30 for model in MODEL_TYPES:31 valid_accs, test_accs, testonly_accs = [], [], []32 for seed in range(1, 1 + num_seeds):33 logfile = os.path.join(target_dir, "%s_seed%i.txt" % (model.lower(), seed))34 test_logfile = os.path.join(target_dir, "%s_seed%i-testonly.txt" % (model.lower(), seed))35 with open(logfile, "w") as log_fh:36 subprocess.check_call(["python",37 "train.py",38 "--quiet",39 "--run-test",40 model,41 "VarMisuse",42 "--model-param-overrides",43 "{\"random_seed\": %i}" % seed,44 ],45 stdout=log_fh,46 stderr=log_fh)47 model_file = None48 with open(logfile, "r") as log_fh:49 for line in log_fh.readlines():50 valid_res_match = VALID_RES_RE.search(line)51 test_res_match = TEST_RES_RE.search(line)52 model_file_match = MODEL_FILE_RE.search(line)53 if valid_res_match is not None:54 valid_accs.append(float(valid_res_match.groups()[0]))55 elif test_res_match is not None:56 test_accs.append(float(test_res_match.groups()[0]))57 elif model_file_match is not None:58 model_file = model_file_match.groups()[0]59 # Run TestOnly60 assert model_file is not None, "Could not find saved model file"61 with open(test_logfile, "w") as log_fh:62 subprocess.check_call(["python",63 "test.py",64 "--quiet",65 model_file,66 "data/varmisuse/graphs-testonly",67 ],68 stdout=log_fh,69 stderr=log_fh)70 with open(test_logfile, "r") as log_fh:71 for line in log_fh.readlines():72 test_res_match = TEST_RES_RE.search(line)73 if test_res_match is not None:74 testonly_accs.append(float(test_res_match.groups()[0]))75 76 print("| %- 14s | %.3f (+/- %.3f) | %.3f (+/- %.3f) | %.3f (+/- %.3f) |"77 % (model,78 np.mean(valid_accs),79 np.std(valid_accs),80 np.mean(test_accs),81 np.std(test_accs),82 np.mean(testonly_accs),83 np.std(testonly_accs),84 ))85if __name__ == "__main__":86 args = docopt(__doc__)...

Full Screen

Full Screen

unit_test_case.py

Source:unit_test_case.py Github

copy

Full Screen

1import pytest2from logparser import apache_output3from logparser import generate_report4#testing the apache_output method from logparser.py file5def test_for_apache_output():6 test_logfile='/Users/z004lc8/Desktop/Apache_Log_Parser_Using_Python/test_input.log'7 testing_report_data = [{'ip_host': '45.138.145.131', 'log_status': '200', 'url': '"-"'}]8 actual_report_data = []9 with open(test_logfile) as f:10 for line in f:11 line_dict = apache_output(line)12 actual_report_data.append(line_dict)13 assert actual_report_data == testing_report_data, "test_passed"14#testing the generate_report method from logparser.py file15def test_for_generate_report():16 test_logfile='/Users/z004lc8/Desktop/Apache_Log_Parser_Using_Python/test_input.log'17 testing_result = [[], 100.0, 0.0, [], []]18 actual_report_data = []19 with open(test_logfile) as f:20 for line in f:21 line_dict = apache_output(line)22 actual_report_data.append(line_dict)23 actual_result=generate_report(actual_report_data)24 assert actual_result == testing_result...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run avocado automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful