How to use on_step_end method in Lemoncheesecake

Best Python code snippet using lemoncheesecake

callbacks.py

Source:callbacks.py Github

copy

Full Screen

...5354 def on_step_begin(self, step, logs={}):55 pass5657 def on_step_end(self, step, logs={}):58 pass5960 def on_action_begin(self, action, logs={}):61 pass6263 def on_action_end(self, action, logs={}):64 pass656667class CallbackList(object):68 def __init__(self, callbacks=None):69 callbacks = callbacks or []70 self.callbacks = [c for c in callbacks]7172 def append(self, callback):73 self.callbacks.append(callback)7475 def set_params(self, params):76 for callback in self.callbacks:77 callback.set_params(params)7879 def set_model(self, model):80 for callback in self.callbacks:81 callback.set_model(model)8283 def set_env(self, env):84 for callback in self.callbacks:85 callback.set_env(env)8687 def on_test_begin(self, logs={}):88 for callback in self.callbacks:89 callback.on_test_begin(logs)9091 def on_test_end(self, logs={}):92 for callback in self.callbacks:93 callback.on_test_end(logs)9495 def on_train_begin(self, logs={}):96 for callback in self.callbacks:97 callback.on_train_begin(logs)9899 def on_train_end(self, logs={}):100 for callback in self.callbacks:101 callback.on_train_end(logs)102103 def on_episode_begin(self, episode, logs={}):104 for callback in self.callbacks:105 callback.on_episode_begin(episode, logs=logs)106107 def on_episode_end(self, episode, logs={}):108 for callback in self.callbacks:109 callback.on_episode_end(episode, logs=logs)110111 def on_step_begin(self, step, logs={}):112 for callback in self.callbacks:113 callback.on_step_begin(step, logs=logs)114115 def on_step_end(self, step, logs={}):116 for callback in self.callbacks:117 callback.on_step_end(step, logs=logs)118119 def on_action_begin(self, action, logs={}):120 for callback in self.callbacks:121 callback.on_action_begin(action, logs=logs)122123 def on_action_end(self, action, logs={}):124 for callback in self.callbacks:125 callback.on_action_end(action, logs=logs)126127128# ==================================================129130# Compute running averages for intervals of metrics131class IntervalMetrics(Callback):132 def __init__(self, config):133 self.step = 0134 self.interval = config.getint('ReportInterval', 10000)135 self.all_metrics = {}136 self.interval_metrics = {'step': []}137138 def update(self, logs):139 self.interval_metrics['step'].append(self.step)140 for name, values in self.all_metrics.items():141 self.interval_metrics[name].append(np.nanmean(values[-self.interval:]))142 logs['interval_metrics'] = self.interval_metrics143 logs['all_metrics'] = self.all_metrics144145 def on_train_begin(self, logs):146 self.metrics_names = self.model.metrics_names147 for name in self.metrics_names:148 self.all_metrics.setdefault(name, [])149 self.interval_metrics.setdefault(name, [])150151 def on_train_end(self, logs):152 self.update(logs)153154 def on_step_end(self, step, logs):155 for name, value in zip(self.metrics_names, logs['metrics']):156 self.all_metrics[name].append(value)157 self.step += 1158 if self.step % self.interval == 0:159 self.update(logs)160161162# ==================================================163164# Collect episode rewards165class CollectRewards(Callback):166 def __init__(self, config):167 self.step = 0168 self.rewards = {'episode': [], 'step': [], 'reward': []}169170 def update(self, logs):171 logs['all_episode_rewards'] = self.rewards172173 def on_train_end(self, logs):174 self.update(logs)175176 def on_episode_end(self, episode, logs):177 self.rewards['episode'].append(episode)178 self.rewards['step'].append(self.step)179 self.rewards['reward'].append(logs['episode_reward'])180 self.update(logs)181182 def on_step_end(self, step, logs):183 self.step += 1184 self.update(logs)185186187# Collect episode rewards in intervals188class IntervalRewards(Callback):189 def __init__(self, config):190 self.step = 0191 self.interval = config.getint('ReportInterval', 10000)192193 def reset(self):194 self.interval_rewards = []195196 def update(self, logs):197 # Compute mean, min, and max of episode rewards198 logs['interval_rewards'] = {'nb_episodes': len(self.interval_rewards)}199 if len(self.interval_rewards) > 0:200 logs['interval_rewards']['mean'] = np.mean(self.interval_rewards)201 logs['interval_rewards']['min'] = np.min(self.interval_rewards)202 logs['interval_rewards']['max'] = np.max(self.interval_rewards)203204 def on_train_begin(self, logs):205 self.reset()206207 def on_train_end(self, logs):208 self.update(logs)209210 def on_episode_end(self, episode, logs):211 self.interval_rewards.append(logs['episode_reward'])212213 def on_step_end(self, step, logs):214 self.step += 1215 if self.step % self.interval == 0:216 self.update(logs)217 self.reset()218219220# ==================================================221222class IntervalProgress(Callback):223 def __init__(self, config):224 self.step = 0225 self.interval = config.getint('ReportInterval', 10000)226227 def __getstate__(self):228 state = super(IntervalProgress, self).__getstate__()229 # if 'progbar' in state: del state['progbar']230 return state231232 def reset(self):233234 self.interval_start = timeit.default_timer()235 # self.progbar = Progbar(target=self.interval)236 print('Interval {} ({} steps performed)'.format(self.step // self.interval + 1, self.step))237238 def update(self, logs):239 formatted_metrics = ''240 if 'interval_metrics' in logs:241 for name, values in logs['interval_metrics'].items():242 if name == 'step': continue243244 if type(values[-1]) is not float:245 formatted_metrics += ' - {}: {}'.format(name, values[-1])246 else:247 formatted_metrics += ' - {}: {:.4f}'.format(name, values[-1])248 formatted_rewards = ''249 if 'interval_rewards' in logs:250 eps = logs['interval_rewards']['nb_episodes']251 if eps > 0:252 formatted_rewards = ' - episode_rewards: {:.3f} [{:.3f}, {:.3f}]'.format(253 logs['interval_rewards']['mean'],254 logs['interval_rewards']['min'],255 logs['interval_rewards']['max'])256 else:257 eps = 0258 print('{} episodes{}{}'.format(eps, formatted_rewards, formatted_metrics))259260 def on_train_begin(self, logs):261 self.train_start = timeit.default_timer()262 print('Training for {} steps ...'.format(self.params['nb_steps']))263 self.reset()264265 def on_train_end(self, logs):266 duration = timeit.default_timer() - self.train_start267 print('done, took {:.3f} seconds'.format(duration))268 self.update(logs)269270 def on_step_end(self, step, logs):271 values = [('reward', logs['reward'])]272 # self.progbar.update((self.step % self.interval) + 1, values=values, force=True)273 self.step += 1274 if self.step % self.interval == 0:275 self.update(logs)276 self.reset()277278279# ==================================================280281class IntervalTest(Callback):282 def __init__(self, config):283 self.step = 0284 self.interval = config.getint('ReportInterval', 10000)285 self.testcount = config.getint('TestCount', 100)286 self.testlength = config.getint('TestLength', 200)287288 def set_env(self, env):289 # Make a new copy of the environment290 self.env = env.env #.clone()291292 def test(self, logs):293 total = 0.294 rewards = []295 for _ in range(self.testcount):296 err = 0.297 reward = 0298 s = self.env.reset()299 for i in range(self.testlength):300 # if isinstance(self.model, KQLearningAgentIID):301 # a, _ = self.model.act(s, stochastic=False)302 # else:303 a = self.model.act(s, stochastic=False)304 s_, r, done, _ = self.env.step(a)305 if done: s_ = None306 err += 0.5 * self.model.bellman_error(s, a, r, s_) ** 2307 s = s_308 reward += r309 if done: break310 rewards.append(reward)311312 total += err / float(i + 1)313 loss = float(total) / float(self.testcount) + self.model.model_error()314 logs.setdefault('interval_metrics', {}).setdefault('Testing Loss', []).append(loss)315 logs.setdefault('interval_metrics', {}).setdefault('Testing Reward', []).append(np.mean(rewards))316 logs.setdefault('interval_metrics', {}).setdefault('Testing Reward Std', []).append(np.std(rewards))317318319 def on_step_end(self, step, logs):320 self.step += 1321 if self.step % self.interval == 0:322 self.test(logs)323324325class IntervalACCTest(Callback):326 def __init__(self, config):327 self.step = 0328 self.interval = config.getint('ReportInterval', 10000)329 self.testpoints = config.getint('TestPoints', 2000)330331 def set_env(self, env):332 # sample from environment state/actions333 self.samples = []334 s = env.env.reset()335 for _ in range(self.testpoints):336 s, a = env.env.observation_space.sample(), env.env.action_space.sample()337 env.env.state = s338 s_, r, _, _ = env.env.step(a)339 self.samples.append((s, a, r, s_))340341 def test(self, logs):342 err = 0.343 total_reward = 0344 for s, a, r, s_ in self.samples:345 err += 0.5 * self.model.bellman_error(s, a, r, s_) ** 2346 # total_reward += r347348 loss = float(err) / float(len(self.samples))349 logs.setdefault('interval_metrics', {}).setdefault('ACC Testing Loss', []).append(loss)350 logs.setdefault('interval_metrics', {}).setdefault('ACC Regularized Testing Loss', []).append(351 self.model.model.Q.normsq())352353 def on_step_end(self, step, logs):354 self.step += 1355 if self.step % self.interval == 0:356 self.test(logs)357358359class IntervalMCTest(Callback):360 def __init__(self, config):361 self.x = []362 self.step = 0363 self.interval = config.getint('ReportInterval', 10000)364365 self.teststatecount = config.getint('TestStateCount', 100) # count of misc states366 self.testtrajlength = config.getint('TestTrajLength', 1000) # count of trajectory length367368 # self.sarsa_steps = config.getint('SARSASteps', 100000)369370 def set_env(self, env):371 # Make a new copy of the environment372 self.env = env.clone()373374 def test(self, logs):375 # print(np.shape(self.x))376 if (not np.asarray(self.x).size == 0):377 perror = np.mean(np.abs(self.model.model.Q(self.x).flatten() - self.testValues) / np.abs(self.testValues))378 else:379 perror = 0380 logs.setdefault('interval_metrics', {}).setdefault('Testing Loss', []).append(perror)381382 # Function to make one rollout383 def rollout(self, N=None, s=None):384 if s is None:385 s = self.env.reset()386 else:387 self.env.state = s388 tr = []389 while (N is None) or (len(tr) < N):390391 if self.step == 1:392 stoch = True393 else:394 stoch = False395396 a = self.model.act(s, stochastic=stoch) # policy.select(s)397 # print (a)398 s_, r, done, _ = self.env.step(a)399 if done:400 tr.append((s, a, r, None, None))401 return tr402 a_ = self.model.act(s_, stochastic=stoch) # policy.select(s_)403 tr.append((s, a, r, s_, a_))404 s = s_405 return tr406407 # Function to make a trajectory (that could have multiple episodes)408 def make_trajectory(self, N):409 traj = []410 while len(traj) < N:411 # print (len(traj))412 traj.extend(self.rollout(N - len(traj)))413 return traj414415 def mc_rollout(self):416 # Generate misc trajectory417 testTrajectory = self.make_trajectory(self.testtrajlength)418 # Select misc points419 samples = random.sample(testTrajectory, self.teststatecount)420 self.testStates = [tup[0] for tup in samples]421 self.testActions = [tup[1] for tup in samples]422423 self.testActions = np.reshape(self.testActions, (-1, 1))424425 self.x = np.concatenate((self.testStates, self.testActions), axis=1)426 # Evaluate the rollouts from the misc states427 self.testValues = []428 for i, s0 in enumerate(self.testStates):429 # print(i)430 # Perform many rollouts from each misc state to get average returns431 R0 = 0.432 for k in range(self.testtrajlength):433 # Get the list of rewards434 Rs = [tup[2] for tup in self.rollout(2000, s0)]435 # Accumulate436 R = reduce(lambda R_, R: R + self.model.gamma * R_, Rs, 0.)437 # Average438 R0 += (R - R0) / (k + 1)439 # Save this value440 self.testValues.append(R0)441 # if (i + 1) % 100 == 0:442 # print('Computing misc point {}/{}'.format(i + 1, self.teststatecount))443444 def on_step_end(self, step, logs):445 self.step += 1446 if self.step % self.interval == 1:447 self.test(logs)448 if self.step % self.sarsa_steps == 1:449 self.mc_rollout()450451452# ==================================================453454class PlotMetrics(Callback):455 def __init__(self, config):456 self.prefix = config.name + ' - ' if config.name != 'DEFAULT' else ''457458 def on_step_end(self, step, logs):459 if ('interval_metrics' in logs) and (len(logs['interval_metrics']['step']) > 1):460 for name, values in logs['interval_metrics'].items():461 if name == 'step': continue462 plt.figure(self.prefix + name);463 plt.clf()464 plt.plot(logs['interval_metrics']['step'], values)465 plt.title(self.prefix + name);466 plt.xlabel('Steps');467 plt.ylabel(name)468 plt.draw_all();469 plt.pause(1e-3)470471472class PlotRewards(Callback):473 def __init__(self, config):474 self.step = 0475 self.interval = config.getint('ReportInterval', 10000)476 self.prefix = config.name + ' - ' if config.name != 'DEFAULT' else ''477478 def update(self, logs):479 if ('all_episode_rewards' in logs) and (len(logs['all_episode_rewards']['reward']) > 1):480 plt.figure(self.prefix + 'Cumulative Rewards');481 plt.clf()482 plt.plot(logs['all_episode_rewards']['episode'], logs['all_episode_rewards']['reward'])483 plt.xlabel('Episode');484 plt.ylabel('Cumulative Reward')485 plt.draw_all();486 plt.pause(1e-3)487488 def on_step_end(self, step, logs):489 self.step += 1490 if self.step % self.interval == 0:491 self.update(logs)492493494class PlotValueFunction(Callback):495 def __init__(self, config):496 self.step = 0497 self.interval = config.getint('ReportInterval', 10000)498 self.prefix = config.name + ' - ' if config.name != 'DEFAULT' else ''499500 def update(self, logs):501 pass502503 def on_train_begin(self, logs):504 self.bounds = self.env.bounds505506 def on_step_end(self, logs):507 step += 1508 if self.step % self.interval == 0:509 self.update(logs)510511512# ==================================================513def make_callbacks(config):514 callbacks = CallbackList()515 for cls in Callback.__subclasses__():516 if config.getboolean(cls.__name__, False):517 callbacks.append(cls(config)) ...

Full Screen

Full Screen

RR_ScoreKeepers.py

Source:RR_ScoreKeepers.py Github

copy

Full Screen

...26 def on_step_begin(self):27 super(AbstractScoreKeeper, self).on_step_begin()28 self.reward_happy = 0.029 self.reward_grumpy = 0.030 def on_step_end(self):31 """ Adjust reward here. Override in inherited class. """32 super(AbstractScoreKeeper, self).on_step_end()33 """ ADD MORE EVENTS HERE, AS NEEDED. """34 def on_robot_collision(self, bot1 :Robot, bot2 :Robot):35 """ Adjust reward here. Override in inherited class. """36 super(AbstractScoreKeeper, self).on_robot_collision(bot1, bot2)37class ChasePosBall(AbstractScoreKeeper):38 """39 Super basic. Just chase the freaking good balls. Don't care what you do with them.40 Mainly intended for 1-ball games, but could be useful long term with high enough gamma.41 """42 def on_step_end(self):43 super(ChasePosBall, self).on_step_end()44 for sprRobot in self.lstHappyBots:45 for sprBall in self.lstPosBalls:46 dist_now = distance(sprRobot.rectDbl.center, sprBall.rectDbl.center)47 dist_prior = distance(sprRobot.rectDblPriorStep.center, sprBall.rectDbl.center)48 self.reward_happy += (dist_prior - dist_now) * const.POINTS_ROBOT_TRAVEL_MULT49 for sprRobot in self.lstGrumpyBots:50 for sprBall in self.lstPosBalls:51 dist_now = distance(sprRobot.rectDbl.center, sprBall.rectDbl.center)52 dist_prior = distance(sprRobot.rectDblPriorStep.center, sprBall.rectDbl.center)53 self.reward_grumpy += (dist_prior - dist_now) * const.POINTS_ROBOT_TRAVEL_MULT54class DontDriveInGoals(AbstractScoreKeeper):55 """ If you drive in the goal, you get penalized. Thems the rules. """56 def on_step_end(self):57 super(DontDriveInGoals, self).on_step_end()58 for sprRobot in self.lstRobots:59 if TrashyPhysics.robot_in_goal(sprRobot, self.sprHappyGoal) or \60 TrashyPhysics.robot_in_goal(sprRobot, self.sprGrumpyGoal):61 if sprRobot.intTeam == const.TEAM_HAPPY:62 self.reward_happy -= const.POINTS_ROBOT_IN_GOAL_PENALTY63 else:64 self.reward_grumpy -= const.POINTS_ROBOT_IN_GOAL_PENALTY65class KeepMovingGuys(AbstractScoreKeeper):66 """ Don't move? Get penalized. Lazy robots... """67 def on_step_end(self):68 super(KeepMovingGuys, self).on_step_end()69 for sprRobot in self.lstRobots:70 if sprRobot.rectDbl.center == sprRobot.rectDblPriorStep.center and \71 sprRobot.rectDbl.rotation == sprRobot.rectDblPriorStep.rotation:72 if sprRobot.intTeam == const.TEAM_HAPPY:73 self.reward_happy -= const.POINTS_NO_MOVE_PENALTY74 else:75 self.reward_grumpy -= const.POINTS_NO_MOVE_PENALTY76class BaseDestruction(AbstractScoreKeeper):77 """ If a base is destroyed (3 neg balls) there's a LOT of points we need to dish out. """78 def on_step_end(self):79 super(BaseDestruction, self).on_step_end()80 if self.sprHappyGoal.is_destroyed():81 self.reward_happy += const.POINTS_GOAL_DESTROYED82 self.reward_grumpy -= const.POINTS_GOAL_DESTROYED83 elif self.sprGrumpyGoal.is_destroyed():84 self.reward_happy += const.POINTS_GOAL_DESTROYED85 self.reward_grumpy -= const.POINTS_GOAL_DESTROYED86class NaughtyBots(AbstractScoreKeeper):87 """ This isn't American bumper cars, mmk? No smashing. Wait your damn turn. """88 def __init__(self, lst_starting_config:List[Tuple[float, float]] = GameEnv.CONFIG_RANDOM):89 super(NaughtyBots, self).__init__(lst_starting_config)90 self.set_naughty_bots = set() # type: Set[Robot]91 def on_step_begin(self):92 super(NaughtyBots, self).on_step_begin()93 self.set_naughty_bots.clear()94 def on_robot_collision(self, bot1 :Robot, bot2 :Robot):95 super(NaughtyBots, self).on_robot_collision(bot1, bot2)96 if bot1.lngLThrust != 0 or bot1.lngRThrust != 0:97 self.set_naughty_bots.add(bot1)98 if bot2.lngLThrust != 0 or bot2.lngRThrust != 0:99 self.set_naughty_bots.add(bot2)100 def on_step_end(self):101 for sprNaughtyBot in self.set_naughty_bots:102 if sprNaughtyBot.intTeam == const.TEAM_HAPPY:103 self.reward_happy -= const.POINTS_ROBOT_CRASH_PENALTY104 else:105 self.reward_grumpy -= const.POINTS_ROBOT_CRASH_PENALTY106class PushPosBallsToGoal(AbstractScoreKeeper):107 """ Positive points for pushing towards your goal. Neg points otherwise. 0-sum. """108 def __init__(self, lst_starting_config:List[Tuple[float, float]] = GameEnv.CONFIG_RANDOM):109 super(PushPosBallsToGoal, self).__init__(lst_starting_config)110 self.ball_dist_sum = 0.0111 def on_step_begin(self):112 super(PushPosBallsToGoal, self).on_step_begin()113 self.ball_dist_sum = self._calc_ball_dist_sum()114 def on_step_end(self):115 super(PushPosBallsToGoal, self).on_step_end()116 ball_dist_delta = self._calc_ball_dist_sum() - self.ball_dist_sum117 self.reward_happy += ball_dist_delta * const.POINTS_BALL_TRAVEL_MULT118 self.reward_grumpy -= ball_dist_delta * const.POINTS_BALL_TRAVEL_MULT119 def _calc_ball_dist_sum(self):120 # Grumpy's goal is in the 0,0 corner, therefore higher distance is better for Happy team121 return sum(map(lambda x: distance((0,0), x.rectDbl.center), self.lstPosBalls))122class PushNegBallsFromGoal(AbstractScoreKeeper):123 """ Positive points for pushing towards enemy goal. Neg points otherwise. 0-sum. """124 def __init__(self, lst_starting_config:List[Tuple[float, float]] = GameEnv.CONFIG_RANDOM):125 super(PushNegBallsFromGoal, self).__init__(lst_starting_config)126 self.ball_dist_sum = 0.0127 def on_step_begin(self):128 super(PushNegBallsFromGoal, self).on_step_begin()129 self.ball_dist_sum = self._calc_ball_dist_sum()130 def on_step_end(self):131 super(PushNegBallsFromGoal, self).on_step_end()132 ball_dist_delta = self._calc_ball_dist_sum() - self.ball_dist_sum133 self.reward_happy -= ball_dist_delta * const.POINTS_BALL_TRAVEL_MULT134 self.reward_grumpy += ball_dist_delta * const.POINTS_BALL_TRAVEL_MULT135 def _calc_ball_dist_sum(self):136 # Grumpy's goal is in the 0,0 corner, therefore higher distance is better for Grumpy team137 return sum(map(lambda x: distance((0, 0), x.rectDbl.center), self.lstPosBalls))138class PushPosBallsInYourGoal(AbstractScoreKeeper):139 """ TODO THIS """140class PushNegBallsInTheirGoal(AbstractScoreKeeper):...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run Lemoncheesecake automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful