Best Python code snippet using tempest_python
q_solution.py
Source:q_solution.py  
1import numpy as np2import time3class AgentQ:4    def __init__(self, env, policy="epsilon_greedy", epsilon=0.05, alpha=0.1, gamma=1):5        self.env = env6        self.q_table = np.zeros(shape=(self.env.num_states, self.env.num_actions))7        self.policy = policy8        self.epsilon = epsilon9        self.alpha = alpha10        self.gamma = gamma11    def choose_action(self):12        if self.policy == "epsilon_greedy" and np.random.uniform(0, 1) < self.epsilon:13            action = np.random.randint(0, self.env.num_actions)14        else:15            state = self.env.agent_position16            q_values_of_state = self.q_table[state, :]17            # Choose randomly AMONG maximum Q-values18            max_q_value = np.max(q_values_of_state)19            maximum_q_values = np.nonzero(q_values_of_state == max_q_value)[0]20            action = np.random.choice(maximum_q_values)21        return action22    def learn(self, old_state, reward, new_state, action):23        max_q_value_in_new_state = np.max(self.q_table[new_state, :])24        current_q_value = self.q_table[old_state, action]25        self.q_table[old_state, action] = (1 - self.alpha) * current_q_value + self.alpha * (reward + self.gamma * max_q_value_in_new_state)26def q_learning(env, agent, num_episodes=500, max_steps_per_episode=1000, learn=True, seconds_between_each_step=0,27               show_grid=False, show_policy=False, show_q_values=False, show_softmax=False, show_learning_curve=False,28               fig_size=6):29    reward_per_episode = np.zeros(num_episodes)30    for episode in range(0, num_episodes):31        cumulative_reward = 032        step = 033        game_over = False34        while step < max_steps_per_episode and not game_over:35            time.sleep(seconds_between_each_step)36            if show_grid or show_learning_curve:37                env.visualize(show_grid=show_grid, show_policy=show_policy,38                              show_learning_curve=show_learning_curve,39                              show_q_values=show_q_values, clear_the_output=True,40                              episode=episode, reward_per_episode=reward_per_episode,41                              agent_q_table=agent.q_table, fig_size=fig_size)42            old_state = env.agent_position43            action = agent.choose_action()44            reward, new_state = env.make_step(action)45            if learn:46                agent.learn(old_state, reward, new_state, action)47            cumulative_reward += reward48            step += 149            # Check whether agent is at terminal state. If yes: end episode; reset agent.50            if env.is_terminal_state():51                time.sleep(seconds_between_each_step)52                if show_grid or show_learning_curve:53                    env.visualize(show_grid=show_grid, show_policy=show_policy,54                                  show_learning_curve=show_learning_curve,55                                  show_q_values=show_q_values, clear_the_output=True,56                                  episode=episode, reward_per_episode=reward_per_episode,57                                  agent_q_table=agent.q_table, fig_size=fig_size)58                env.reset()59                game_over = True60        reward_per_episode[episode] = cumulative_reward...game_log_viewer.py
Source:game_log_viewer.py  
...38			for i in range(int(prisoner_plane[0][0])):39				game.place_piece( MiniShogi.Piece(pieceType,   None, False, player) )40	window.draw_board(game)41	return game42def show_policy(window, policy, game, player):43	legal_moves = game.all_legal_moves(player)44	move_list = []45	for m in legal_moves:46		move_prob = policy[AlphaMiniShogiSearchTree.get_output_index( m, player )]47		move_list.append( (move_prob, m) )48	move_list.sort(reverse=True, key=lambda m:m[0])49	clear_moves = True50	for m in move_list:51		window.draw_move(m[1], clear_moves, m[0])52		print(m)53		clear_moves = False54def show_game_log(window, game_log, index):55	game_log_x = np.moveaxis(game_log['x'][index], -1, 0)56	print("Reward: ", game_log['y'][1][index])57	game = resort_game(window, game_log_x.tolist())58	# policy, reward = AlphaMiniShogiSearchTree(game, best_net_so_far).predict()59	# print("Model Reward: ", reward)60	# print("Player 0 moves:")61	# show_policy(window, game_log['y'][0][index], game, 0)62	# print("Player 0 net moves:")63	# show_policy(window, policy, game, 0)64	65	# print("Player 1 moves:")66	show_policy(window, game_log['y'][0][index], game, 1)67	# print("Player 1 net moves:")68	# show_policy(window, policy, game, 1)69	70	71best_net_so_far = AlphaGoZeroModel(72		input_board_size=MiniShogi.SIZE,73		number_of_input_planes=6*2*2+4*2,74		policy_output_size=MiniShogi.SIZE*(MiniShogi.SIZE+1)*(MiniShogi.SIZE*MiniShogi.SIZE+6),75		number_of_filters=64,76		number_of_residual_block=20,77		value_head_hidden_layer_size=6478	).init_model()79#net_files = glob.glob(f'model_minishogi_*')80#if net_files:81#	lastest_model_file = max(net_files)82#	print(f"Lastest net: {lastest_model_file}")...policy_show.py
Source:policy_show.py  
...25|   -s,--secured=MODE      HTTPS mode "self" or "CA" [OPTIONAL].26|   -v,--verbose           verbose mode[OPTIONAL].27* outputs:28    * Status of the AG policies29.. function:: policy_show.show_policy(session)30    * Display the status of the AG policies.31        Example usage of the method::32            ret = policy_show.show_policy(session)33            print (ret)34        Details::35            policy_obj = policy()36            result = policy_obj.get(session)37        * inputs:38            :param session: session returned by login.39        * outputs:40            :rtype: dictionary of return status matching rest response41        *use cases*42        1. Retrieve the AG policy information.43"""44import sys45from pyfos import pyfos_auth46from pyfos import pyfos_util47from pyfos.utils import brcd_util48from pyfos.pyfos_brocade_access_gateway import policy49def show_policy(session):50    policy_obj = policy()51    # pyfos_util.response_print(policy_obj)52    result = policy_obj.get(session)53    return result54def main(argv):55    # Print arguments56    # print(sys.argv[1:])57    filters = []58    inputs = brcd_util.parse(argv, policy, filters)59    session = brcd_util.getsession(inputs)60    # pyfos_util.response_print(inputs['utilobject'].displaycustomcli())61    result = show_policy(inputs['session'])62    pyfos_util.response_print(result)63    pyfos_auth.logout(session)64if __name__ == "__main__":...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
