How to use show_policy method in tempest

Best Python code snippet using tempest_python

q_solution.py

Source:q_solution.py Github

copy

Full Screen

1import numpy as np2import time3class AgentQ:4 def __init__(self, env, policy="epsilon_greedy", epsilon=0.05, alpha=0.1, gamma=1):5 self.env = env6 self.q_table = np.zeros(shape=(self.env.num_states, self.env.num_actions))7 self.policy = policy8 self.epsilon = epsilon9 self.alpha = alpha10 self.gamma = gamma11 def choose_action(self):12 if self.policy == "epsilon_greedy" and np.random.uniform(0, 1) < self.epsilon:13 action = np.random.randint(0, self.env.num_actions)14 else:15 state = self.env.agent_position16 q_values_of_state = self.q_table[state, :]17 # Choose randomly AMONG maximum Q-values18 max_q_value = np.max(q_values_of_state)19 maximum_q_values = np.nonzero(q_values_of_state == max_q_value)[0]20 action = np.random.choice(maximum_q_values)21 return action22 def learn(self, old_state, reward, new_state, action):23 max_q_value_in_new_state = np.max(self.q_table[new_state, :])24 current_q_value = self.q_table[old_state, action]25 self.q_table[old_state, action] = (1 - self.alpha) * current_q_value + self.alpha * (reward + self.gamma * max_q_value_in_new_state)26def q_learning(env, agent, num_episodes=500, max_steps_per_episode=1000, learn=True, seconds_between_each_step=0,27 show_grid=False, show_policy=False, show_q_values=False, show_softmax=False, show_learning_curve=False,28 fig_size=6):29 reward_per_episode = np.zeros(num_episodes)30 for episode in range(0, num_episodes):31 cumulative_reward = 032 step = 033 game_over = False34 while step < max_steps_per_episode and not game_over:35 time.sleep(seconds_between_each_step)36 if show_grid or show_learning_curve:37 env.visualize(show_grid=show_grid, show_policy=show_policy,38 show_learning_curve=show_learning_curve,39 show_q_values=show_q_values, clear_the_output=True,40 episode=episode, reward_per_episode=reward_per_episode,41 agent_q_table=agent.q_table, fig_size=fig_size)42 old_state = env.agent_position43 action = agent.choose_action()44 reward, new_state = env.make_step(action)45 if learn:46 agent.learn(old_state, reward, new_state, action)47 cumulative_reward += reward48 step += 149 # Check whether agent is at terminal state. If yes: end episode; reset agent.50 if env.is_terminal_state():51 time.sleep(seconds_between_each_step)52 if show_grid or show_learning_curve:53 env.visualize(show_grid=show_grid, show_policy=show_policy,54 show_learning_curve=show_learning_curve,55 show_q_values=show_q_values, clear_the_output=True,56 episode=episode, reward_per_episode=reward_per_episode,57 agent_q_table=agent.q_table, fig_size=fig_size)58 env.reset()59 game_over = True60 reward_per_episode[episode] = cumulative_reward...

Full Screen

Full Screen

game_log_viewer.py

Source:game_log_viewer.py Github

copy

Full Screen

...38 for i in range(int(prisoner_plane[0][0])):39 game.place_piece( MiniShogi.Piece(pieceType, None, False, player) )40 window.draw_board(game)41 return game42def show_policy(window, policy, game, player):43 legal_moves = game.all_legal_moves(player)44 move_list = []45 for m in legal_moves:46 move_prob = policy[AlphaMiniShogiSearchTree.get_output_index( m, player )]47 move_list.append( (move_prob, m) )48 move_list.sort(reverse=True, key=lambda m:m[0])49 clear_moves = True50 for m in move_list:51 window.draw_move(m[1], clear_moves, m[0])52 print(m)53 clear_moves = False54def show_game_log(window, game_log, index):55 game_log_x = np.moveaxis(game_log['x'][index], -1, 0)56 print("Reward: ", game_log['y'][1][index])57 game = resort_game(window, game_log_x.tolist())58 # policy, reward = AlphaMiniShogiSearchTree(game, best_net_so_far).predict()59 # print("Model Reward: ", reward)60 # print("Player 0 moves:")61 # show_policy(window, game_log['y'][0][index], game, 0)62 # print("Player 0 net moves:")63 # show_policy(window, policy, game, 0)64 65 # print("Player 1 moves:")66 show_policy(window, game_log['y'][0][index], game, 1)67 # print("Player 1 net moves:")68 # show_policy(window, policy, game, 1)69 70 71best_net_so_far = AlphaGoZeroModel(72 input_board_size=MiniShogi.SIZE,73 number_of_input_planes=6*2*2+4*2,74 policy_output_size=MiniShogi.SIZE*(MiniShogi.SIZE+1)*(MiniShogi.SIZE*MiniShogi.SIZE+6),75 number_of_filters=64,76 number_of_residual_block=20,77 value_head_hidden_layer_size=6478 ).init_model()79#net_files = glob.glob(f'model_minishogi_*')80#if net_files:81# lastest_model_file = max(net_files)82# print(f"Lastest net: {lastest_model_file}")...

Full Screen

Full Screen

policy_show.py

Source:policy_show.py Github

copy

Full Screen

...25| -s,--secured=MODE HTTPS mode "self" or "CA" [OPTIONAL].26| -v,--verbose verbose mode[OPTIONAL].27* outputs:28 * Status of the AG policies29.. function:: policy_show.show_policy(session)30 * Display the status of the AG policies.31 Example usage of the method::32 ret = policy_show.show_policy(session)33 print (ret)34 Details::35 policy_obj = policy()36 result = policy_obj.get(session)37 * inputs:38 :param session: session returned by login.39 * outputs:40 :rtype: dictionary of return status matching rest response41 *use cases*42 1. Retrieve the AG policy information.43"""44import sys45from pyfos import pyfos_auth46from pyfos import pyfos_util47from pyfos.utils import brcd_util48from pyfos.pyfos_brocade_access_gateway import policy49def show_policy(session):50 policy_obj = policy()51 # pyfos_util.response_print(policy_obj)52 result = policy_obj.get(session)53 return result54def main(argv):55 # Print arguments56 # print(sys.argv[1:])57 filters = []58 inputs = brcd_util.parse(argv, policy, filters)59 session = brcd_util.getsession(inputs)60 # pyfos_util.response_print(inputs['utilobject'].displaycustomcli())61 result = show_policy(inputs['session'])62 pyfos_util.response_print(result)63 pyfos_auth.logout(session)64if __name__ == "__main__":...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run tempest automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful