How to use reset_steps method in Behave

Best Python code snippet using behave

cliff_dqn.py

Source:cliff_dqn.py Github

copy

Full Screen

1"""2Trains a DQN to traverse the cliffwalking GridWorld.3"""4import random5import time6import copy7from random import Random8from copy import deepcopy9import keras10import tensorflow as tf11import datetime12#ANN imports13import numpy as np14from keras.models import Sequential15from keras.layers.core import Dense16class Grid():17 grid = []18 start = None19 goal = None20 location = [0,0]21 agent = None22 episodes = 023 current_episode = 024 def __init__(self, episodes):25 """26 Creates a 13 by 3 Grid and sets the number of episodes.27 """28 self.grid = [[-1] * 12,[-1] * 12,[-1] * 12,[-1, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -0]]29 self.start = self.grid[3][0]30 self.goal = self.grid[3][11]31 self.location[0] = 332 self.location[1] = 033 self.episodes = episodes34 self.actions = [[-1,0],[1,0],[0,-1],[0,1]]35 def __str__(self):36 """37 Prints the Gridworld and the agent's placement on the grid.38 """39 grid_string="____________\n"40 for x in range(len(self.grid)):41 row = ""42 if x < 3:43 for y in range(len(self.grid[x])):44 if x == self.location[0] and y == self.location[1]:45 row = str(row) + "|*"46 else:47 row = str(row) + "|X"48 else:49 for y in range(len(self.grid[x])):50 if x == self.location[0] and y == self.location[1]:51 row = row + "|*"52 elif y == 0:53 row = str(row) + "|S"54 elif y == 11:55 row = str(row) + "|G"56 else:57 row = str(row) + "|C"58 grid_string = grid_string + row + "|\n"59 return grid_string60 def set_agent(self, agent):61 """62 Set the agent to be used in the Gridworld63 """64 self.agent = agent65 def make_move(self):66 """67 Have the agent make a move and apply reward/punishments. Store state-action-reward in memory68 """69 original_location = copy.deepcopy(self.agent.location)70 #Select an action to take71 move = self.agent.make_move()72 #Execution action and observe reward73 self.location[0] = self.location[0] + move[0][0]74 self.location[1] = self.location[1] + move[0][1]75 self.agent.set_agent_location(self.location)76 new_location = copy.deepcopy(self.agent.location)77 reward = self.grid[self.location[0]][self.location[1]]78 move_index = 079 #get action index80 for x in range(0,4):81 if move[0] == self.actions[x]:82 move_index = x83 if reward == -100 or (self.location[0] == 3 and self.location[1] == 11):84 #Terminal State85 self.agent.remember_state_action(original_location, move_index, reward, new_location, True)86 self.agent.update_approximater()87 self.agent.reset_approximaters()88 self.finish_episode(reward)89 else:90 #Non-Terminal State91 self.agent.remember_state_action(original_location, move_index, reward, new_location, False)92 self.agent.update_score(reward)93 self.agent.update_approximater()94 self.agent.reset_approximaters()95 if self.agent.score <= self.agent.minimum_score:96 self.finish_episode(0) 97 def finish_episode(self, reward):98 """99 Finish the current episode. Print if goal state reached and print final score.100 """101 self.agent.update_score(reward)102 self.agent.set_agent_location(self.location)103 print(str(self))104 finish_string = str("Episode: " + str(self.current_episode+1) + ". Score: " + str(self.agent.score))105 if(self.agent.location[0] == 3 and self.agent.location[1] == 11):106 finish_string = str("Episode: " + str(self.current_episode+1) + ". Score: " + str(self.agent.score))107 finish_string = finish_string + str("\t\tCorrect location reached!")108 print(finish_string)109 print(str("Epsilon:\t") + str(self.agent.epsilon))110 print("\n\n\n\n\n\n\n\n\n")111 #time.sleep(1.75)112 self.agent.scores.append(self.agent.score)113 self.agent.score = 0114 self.current_episode = self.current_episode + 1115 self.agent.finish_episode()116 self.location = [3,0]117 self.agent.set_agent_location(self.location)118class q_approx():119 """120 [0,-1] 0121 [0,1] 1122 [-1,0] 2123 [1,0] 3124 """125 location = [None,None]126 epsilon = 0127 epsilon_decay = 0128 epsilon_minimum = 0.1129 rand = None130 discount = 0131 target_net = None132 current_net = None133 event_memory = None134 memory_size = 0135 sample_size = 0136 steps_taken = 0137 reset_steps = 0138 minimum_score = 0139 score = 0140 scores = []141 def __init__(self, starting_location, epsilon, discount, epsilon_decay=0.05, epsilon_minumum=0.01, memory_size=100, sample_size=32, reset_steps = 500, minimum_score = -100):142 self.location = [int(starting_location[0]), int(starting_location[1])]143 self.epsilon = epsilon144 self.epsilon_decay = epsilon_decay145 self.epsilon_minimum = epsilon_minumum146 self.rand = random.Random()147 self.discount = discount148 self.event_memory = []149 self.memory_size = memory_size150 self.sample_size = sample_size151 self.reset_steps = reset_steps152 self.minimum_score = minimum_score153 #Initialize action-value function Q with random weights154 self.current_net = Sequential()155 self.current_net.add(Dense(3, input_dim=2, activation='tanh'))156 self.current_net.add(Dense(4, activation='linear'))157 self.current_net.compile(loss='mean_squared_error',158 optimizer='adam',159 metrics=['accuracy'])160 #Initialize target action-value function Q161 self.target_net = deepcopy(self.current_net)162 def get_possible_actions(self, location=None):163 """164 Returns all valid moves from a location165 If location = None then the agent's current location is used.166 """167 if location == None:168 location = self.location169 possible_actions = []170 #vertical moves171 if location[0] != 0 and location[0] != 3:172 #Can move up or down173 action = [[-1,0], True]174 possible_actions.append(action)175 action = [[1,0], True]176 possible_actions.append(action)177 elif location[0] == 0:178 #Can only move down179 action = [[-1,0], False]180 possible_actions.append(action)181 action = [[1,0], True]182 possible_actions.append(action)183 elif location[0] == 3:184 #Can only move up185 action = [[-1,0], True]186 possible_actions.append(action)187 action = [[1,0], False]188 possible_actions.append(action)189 #horizontal moves190 if location[1] != 0 and location[1] != 11:191 #Can move left or right192 action = [[0,-1], True]193 possible_actions.append(action)194 action = [[0,1], True]195 possible_actions.append(action)196 elif location[1] == 0:197 #Can move only right198 action = [[0,-1], False]199 possible_actions.append(action)200 action = [[0,1], True]201 possible_actions.append(action)202 elif location[1] == 11:203 #Can move only left204 action = [[0,-1], True]205 possible_actions.append(action)206 action = [[0,1], False]207 possible_actions.append(action)208 return possible_actions209 def make_move(self):210 """211 Gather all possible moves, then chooses either the move with maximum predicted reward or a random move.212 """213 #Gather all action values214 possible_actions = self.get_possible_actions()215 state = np.array([[self.location[0], self.location[1]]])216 potential_rewards = self.query(state)217 for index, reward in np.ndenumerate(potential_rewards):218 #Iterates through potential rewards219 #Reward = the prediction if possible; 0 if not220 array_index = index[1]221 if possible_actions[array_index][1] == True:222 possible_actions[array_index].append(reward)223 else:224 possible_actions[array_index].append(0)225 choose_optimal = self.rand.random()226 move = None227 if choose_optimal > self.epsilon:228 #Choose action with max predicted value229 for x in range(len(possible_actions)):230 if possible_actions[x][1] == True:231 if move == None:232 move = possible_actions[x]233 elif possible_actions[x][2] > move[2]:234 move = possible_actions[x]235 else:236 #Choose a random action237 random_move = self.rand.randrange(0,len(possible_actions))238 move = possible_actions[random_move]239 while move[1] == False: #Ensures move is possible240 random_move = self.rand.randrange(0,len(possible_actions))241 move = possible_actions[random_move]242 self.steps_taken = self.steps_taken + 1243 return move244 def query(self, state, current_net=True):245 """246 Returns a predicted value for a state-action pair.247 If current_net is true then the current_net is used for this prediction.248 If current_net is false then the target_net is used for this prediction.249 """250 if(current_net == True):251 value_prediction = self.current_net.predict(state, batch_size=1)252 else:253 value_prediction = self.target_net.predict(state, batch_size=1)254 return value_prediction255 def update_approximater(self):256 """257 Replays N memories.258 Updates the current_net based on a target which is:259 reward from the state (if terminal state)260 Max predicted reward from next state (if non-terminal state)261 Gradient descent is then performed on the current_net262 """263 if len(self.event_memory) < self.sample_size:264 memory_samples = random.sample(self.event_memory, len(self.event_memory))265 else:266 memory_samples = random.sample(self.event_memory, self.sample_size)267 268 269 for memory in memory_samples:270 previous_state = memory[0]271 action = memory[1]272 reward = memory[2]273 next_state = memory[3]274 275 state = np.array([[previous_state[0], previous_state[1]]])276 277 if memory[4] == True:278 target = np.array([reward])279 else:280 #Calculate max potential value from next state281 next_possible_actions = self.get_possible_actions([next_state[0], next_state[1]])282 next_state = np.array([[next_state[0], next_state[1]]])283 next_possible_rewards = (self.query(next_state, False))284 max_value_move = None285 for x in range(len(next_possible_actions)):286 if next_possible_actions[x][1] == True:287 if max_value_move == None:288 max_value_move = x289 if next_possible_rewards[0,x] > next_possible_rewards[0, max_value_move]:290 max_value_move = x291 292 target = np.array([reward + (self.discount * next_possible_rewards[0, max_value_move])])293 #Update original prediction to become the "target"294 original_prediction = self.target_net.predict(np.array([[previous_state[0], previous_state[1]]]))295 target_array = []296 for x in range (0,4):297 if action == x:298 target_array.append(target[0])299 else:300 target_array.append(original_prediction[0,x])301 302 #Convert target array to numpy array and train current net on the target.303 net_target = np.array([[target_array[0], target_array[1], target_array[2], target_array[3]]])304 self.current_net.fit(state, net_target,verbose=0)305 return False306 def reset_approximaters(self):307 """308 Sets the target_net to the current_net every fixed amount of steps309 """310 if self.steps_taken % self.reset_steps == 0:311 self.target_net = deepcopy(self.current_net)312 def update_score(self, score):313 """314 Updates the agent score based on the reward received.315 """316 self.score = self.score + score317 def set_agent_location(self, location):318 """319 Moves the agent to a located determined by input parameters.320 """321 self.location[0] = location[0]322 self.location[1] = location[1]323 def finish_episode(self):324 """325 Performs the actions related to the ending of an episode.326 """327 self.decay_epsilon()328 return False329 def remember_state_action(self, previous_state, action, reward, next_state, terminal):330 """331 Adds a state-action-reward-next_state-terminal_state array to memory332 This can then be replayed in event recall.333 """334 memory = [previous_state, action, reward, next_state, terminal]335 self.event_memory.append(memory)336 if len(self.event_memory) > self.memory_size:337 self.event_memory.pop(0)338 return False339 def decay_epsilon(self):340 """341 Decays the epsilon by a fixed amount defined during construction, if epsilon is above epsilon minimum342 """343 if self.epsilon > self.epsilon_minimum:344 self.epsilon = self.epsilon * (1 - self.epsilon_decay)345 if self.epsilon < self.epsilon_minimum:346 self.epsilon = self.epsilon_minimum347q_approx_grid = Grid(5)348dqn = q_approx(q_approx_grid.location, 1, 0.99, epsilon_decay=0.01,memory_size=1000, sample_size=32, reset_steps = 500, minimum_score=-250)349q_approx_grid.set_agent(dqn)350print(str(q_approx_grid) + "\n\n\n\n\n\n\n\n\n\n\n")351while(q_approx_grid.current_episode < q_approx_grid.episodes):352 q_approx_grid.make_move()353#Saves results to .csv file.354try:355 filename = (str("results/results_") + str(datetime.datetime.now()) + str(".csv"))356 file = open(filename, "w+", newline="\n")357 for x in range(0, len(dqn.scores)):358 file.write(str(x+1) + "," + str(dqn.scores[x]) + "\n")359 file.close()360except:...

Full Screen

Full Screen

evaluate_wheeled_pybullet_windows.py

Source:evaluate_wheeled_pybullet_windows.py Github

copy

Full Screen

1import os2from envs.WheeledRobotPybulletEnv import WheeledRobotPybulletEnv3from stable_baselines.ppo2.ppo2 import PPO24from stable_baselines.common.vec_env import DummyVecEnv5import matplotlib.pyplot as plt6raw_env = WheeledRobotPybulletEnv(decision_interval=1, use_GUI=True,num_episode_steps=5)7# Optional: PPO2 requires a vectorized environment to run8# the env is now wrapped automatically when passing it to the constructor9vec_env = DummyVecEnv([lambda: raw_env])10dir_name = "results\LearningResults\PPO_WheeledRobotPybullet"11tensorboard_dir = dir_name + "\\tensorboard"12model_dir = dir_name + "\\model"13model = PPO2.load(model_dir, vec_env)14# model.learn(total_timesteps=100, tb_log_name="test")15# model.save(model_dir)16env = vec_env.envs[0]17obs_prev = env.reset()18x_poss = [env.snake_robot.x]19y_poss = [env.snake_robot.y]20thetas = [env.snake_robot.theta]21times = [0]22a1s = [env.snake_robot.a1]23a2s = [env.snake_robot.a2]24a1dots = [env.snake_robot.a1dot]25a2dots = [env.snake_robot.a2dot]26# robot_params = []27# Calculate number of time steps based on decsion interval to have 30sec rollout28# decision interval = dt , num_steps = n, rollout_time = t = 30sec, dt*n = t --> n = t/dt29t = 100 #sec30n = int(t/env.snake_robot.decision_interval)31n = (env.num_episode_steps)*332for i in range(n):33 x_prev = env.snake_robot.x34 action, _states = model.predict(obs_prev)35 obs, rewards, dones, info = env.step(action)36 x = env.snake_robot.x37 print(38 "Timestep: {} | State: {} | Action: {} | Reward: {} | dX: {}".format(i, obs_prev, action, rewards, x - x_prev))39 obs_prev = obs40 x_poss.append(env.snake_robot.x)41 y_poss.append(env.snake_robot.y)42 thetas.append(env.snake_robot.theta)43 times.append(i)44 a1s.append(env.snake_robot.a1)45 a2s.append(env.snake_robot.a2)46 a1dots.append(env.snake_robot.a1dot)47 a2dots.append(env.snake_robot.a2dot)48plots_dir = dir_name + "\\PolicyRolloutPlotsFromLoading\\"49if not os.path.isdir(plots_dir):50 os.mkdir(plots_dir)51#----- Seperate data into 3 trials and plot results "double check reset theta and indecies used for plot" -------------#52import matplotlib53import numpy as np54matplotlib.rcParams['font.family'] = 'serif'55matplotlib.rcParams['font.size'] = 1256policy_fig = plt.figure(figsize=(10, 10))57plt.subplot(1, 2, 1)58plt.title('Position-Orientation vs Time')59colors = ['red','green','blue']; maps = ['Reds','Greens','Blues']60reset_steps = env.num_episode_steps61for i in range(3):62 index = reset_steps*i63 Time = np.arange(0,len(x_poss[reset_steps*i+1:reset_steps*(i+1)]),1) * env.snake_robot.decision_interval64 plt.plot(x_poss[reset_steps*i+1:reset_steps*(i+1)], y_poss[reset_steps*i+1:reset_steps*(i+1)], color=colors[i], linestyle='dashed', marker="o", alpha=0.2, label='trial'+str(i)) #+'theta ='+str(thetas[index]) )65 plt.quiver(x_poss[reset_steps*i+1:reset_steps*(i+1)], y_poss[reset_steps*i+1:reset_steps*(i+1)], np.cos( thetas[reset_steps*i+1:reset_steps*(i+1)] ), np.sin( thetas[reset_steps*i+1:reset_steps*(i+1)] ), Time, edgecolors='k', units='xy', cmap=maps[i])66plt.legend() #[r'$ \hat P_{(x_i,y_i,\theta_i)} $'])67plt.xlabel('X position (meters)'); plt.ylabel('Y position (meters)')68plt.subplot(1, 2, 2)69plt.title('Joint Angle Space vs Time')70for i in range(3):71 A1s = a1s[reset_steps*i+1:reset_steps*(i+1)]72 A2s = a2s[reset_steps*i+1:reset_steps*(i+1)]73 T = np.arange(0,len(a1s[reset_steps*i+1:reset_steps*(i+1)]),1) * env.snake_robot.decision_interval74 plt.plot(A1s, A2s, alpha=(i+1)/3,color= colors[i],label='trial'+str(i))75 plt.scatter(A1s, A2s, c=T, marker='d',cmap = maps[i])76plt.legend()77plt.xlabel(r'$\alpha_1$')78plt.ylabel(r'$\alpha_2$')79plt.tight_layout()80plt.savefig(plots_dir + 'PolicyRolloutPlot' + '.png')81plt.close()82#-------------------------------- End of Policy Rollout Plot ---------------------------------#83# view results84# print('x positions are: ' + str(x_pos))85# print('y positions are: ' + str(y_pos))86# print('thetas are: ' + str(thetas))87plot_style = "--bo"88marker_size = 389plt.plot(x_poss, y_poss, plot_style, markersize=marker_size)90plt.xlabel('x')91plt.ylabel('y')92plt.savefig(plots_dir + 'y vs x' + '.png')93plt.close()94plt.plot(times, a1s, plot_style, markersize=marker_size)95plt.ylabel('a1 displacements')96plt.xlabel('time')97plt.savefig(plots_dir + 'a1 displacements' + '.png')98plt.close()99plt.plot(times, a2s, plot_style, markersize=marker_size)100plt.ylabel('a2 displacements')101plt.xlabel('time')102plt.savefig(plots_dir + 'a2 displacements' + '.png')103plt.close()104plt.plot(times, x_poss, plot_style, markersize=marker_size)105plt.ylabel('x positions')106plt.xlabel('time')107plt.savefig(plots_dir + 'x positions' + '.png')108plt.close()109plt.plot(times, y_poss, plot_style, markersize=marker_size)110plt.ylabel('y positions')111plt.xlabel('time')112plt.savefig(plots_dir + 'y positions' + '.png')113plt.close()114plt.plot(times, thetas, plot_style, markersize=marker_size)115plt.ylabel('thetas')116plt.xlabel('time')117plt.savefig(plots_dir + 'thetas' + '.png')118plt.close()119plt.plot(times, a1dots, plot_style, markersize=marker_size)120plt.ylabel('a1dot')121plt.xlabel('time')122plt.savefig(plots_dir + 'a1dot' + '.png')123plt.close()124plt.plot(times, a2dots, plot_style, markersize=marker_size)125plt.ylabel('a2dot')126plt.xlabel('time')127plt.savefig(plots_dir + 'a2dot' + '.png')128plt.close()129""""""130# Comprehensive Plot of Policy Rollout Data131# - Joint angles vs Time132# - Joint States vs Time133# - Actions vs Time134# - Position/Orientation vs Time135# convert time scale based on decsion interval136TIME = []137for t_steps in times:138 TIME.append(env.snake_robot.decision_interval*t_steps)139times = TIME140import matplotlib141matplotlib.rcParams['font.family'] = 'serif'142matplotlib.rcParams['font.size'] = 12143matplotlib.rcParams['image.cmap'] = 'gray'144import math145policy_fig = plt.figure(figsize=(10, 10))146plt.subplot(2, 2, 1)147plt.title('Joint Angles vs Time')148plt.plot(times,a1s,marker='d',alpha = 0.5,color='red')149plt.plot(times,a2s,marker='p',alpha = 0.5,color='blue')150plt.xlabel('time (sec)')151plt.ylabel(r'$\alpha $'+ ' ' + '(radians)')152plt.legend([r'$\alpha_1$',r'$\alpha_2$'])153plt.subplot(2, 2, 2)154plt.title('Joint States vs Time')155plt.plot(a1s,a2s, 'k--', alpha = 0.1)156plt.scatter(a1s,a2s, c=times, marker='d')157cbar = plt.colorbar(); cbar.set_label('time (sec)')158plt.xlabel(r'$\alpha_1$')159plt.ylabel(r'$\alpha_2$')160plt.subplot(2, 2, 3)161plt.title('Actions vs Time')162plt.plot(times,a1dots,marker='d',alpha = 0.5,color='red')163plt.plot(times,a2dots,marker='p',alpha = 0.5,color='blue')164plt.xlabel('time (sec)')165plt.ylabel(r'$\.\alpha $'+ ' ' + '(radians/sec)')166plt.legend([r'$\.\alpha_1$',r'$\.\alpha_2$'])167plt.subplot(2, 2, 4)168plt.title('Position-Orientation vs Time')169sys_info = ("Evaluation Time: {} | Decision Interval: {} | X Displacment: {} | Gait Speed: {} |".format(t, env.snake_robot.decision_interval, round(x_poss[-1]-x_poss[0],2), round( ((x_poss[-1]-x_poss[0])/t),2) ))170plt.suptitle(sys_info)171xv,yv,Lx,Ly = [],[],[],[]172for i in range(len(thetas)):173 xv.append(math.cos(thetas[i])); yv.append(math.sin(thetas[i]))174 Lx.append(0); Ly.append(1)175plt.scatter(x_poss, y_poss, c=times)176B = plt.quiver(x_poss,y_poss,xv,yv,times,edgecolors='k',units='xy')177cbar = plt.colorbar(); cbar.set_label('time (sec)')178plt.plot(x_poss, y_poss, color='black', linestyle='dashed', marker="o", alpha=0.2)179plt.xlabel('X position (meters)'); plt.ylabel('Y position (meters)')180plt.legend([r'$ \hat P_{(x_i,y_i,\theta_i)} $'])181# heading "phi" arrow with colormap182# body frame arrows --> plt.quiver(x_poss, y_poss, Lx, Ly, angles='xy') & plt.quiver(x_poss, y_poss, xv, yv, times, edgecolors='k', units='xy')183# legend compass, indicates heading direction184X_key = -0.125185Y_key = -0.075186Arrow_length = 1.15187#B = plt.quiver(x_poss, y_poss, Ly, Lx, angles='xy')188plt.quiverkey(B, X_key, Y_key, Arrow_length, r'$\theta= \pi/2$', angle=90, labelpos='N') #,coordinates='figure')189plt.quiverkey(B, X_key+.0675, Y_key-0.025, Arrow_length, r'$\theta=0$', angle=0,labelpos='E') #,coordinates='figure')190plt.tight_layout()191#plt.savefig(plots_dir + 'PolicyRolloutPlot' + '.png')...

Full Screen

Full Screen

exp_igt_optimizer_test.py

Source:exp_igt_optimizer_test.py Github

copy

Full Screen

1# coding=utf-82# Copyright 2020 The Google Research Authors.3#4# Licensed under the Apache License, Version 2.0 (the "License");5# you may not use this file except in compliance with the License.6# You may obtain a copy of the License at7#8# http://www.apache.org/licenses/LICENSE-2.09#10# Unless required by applicable law or agreed to in writing, software11# distributed under the License is distributed on an "AS IS" BASIS,12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.13# See the License for the specific language governing permissions and14# limitations under the License.15# Lint as: python2, python316"""Tests for igt_optimizer."""17from __future__ import absolute_import18from __future__ import division19from __future__ import print_function20import numpy as np21from six.moves import zip22import tensorflow.compat.v1 as tf23from igt_optimizer import exp_igt_optimizer24# pylint:disable=g-direct-tensorflow-import25from tensorflow.python.framework import dtypes26from tensorflow.python.ops import resource_variable_ops27from tensorflow.python.ops import variables as tf_variables28from tensorflow.python.platform import test29# pylint:enable=g-direct-tensorflow-import30LEARNING_RATE = 2.031class IgtValidator(object):32 """A reference python implementation of the IGT optimizer."""33 def __init__(self,34 w_init,35 learning_rate,36 reset_steps=None,37 reset_shift=False):38 self.w = w_init39 self.learning_rate = learning_rate40 if reset_steps is None:41 reset_steps = []42 self.reset_steps = reset_steps43 self.reset_shift = reset_shift44 self.step = 045 self.v = np.zeros(self.w.shape)46 self.w_hat = self.w.copy()47 def update(self, grad):48 momentum = self.step / (self.step + 1.)49 self.v = momentum * self.v + (1. - momentum) * grad50 update = -self.learning_rate * self.v51 self.w += update52 self.step += 153 momentum_next = self.step / (self.step + 1.)54 self.w_hat = self.w + momentum_next / (1. - momentum_next) * update55 if self.step in self.reset_steps:56 if self.reset_shift:57 self.w = self.w_hat58 else:59 self.w_hat = self.w60 self.step = 061class ExpIgtOptimizerTest(test.TestCase):62 def doTestApplyGradients(self, use_resource=False):63 """Validate the IGT update (i.e. apply_gradients) against a python impl."""64 # TODO(manzagop): try dtypes.half and dtypes.float64:65 for dtype in [dtypes.float32]:66 print('running for dtype {}'.format(dtype))67 with self.test_session():68 # Set up 2 variables and constants for their gradients.69 var0_value = np.array([1.0, 2.0])70 var1_value = np.array([3.0, 4.0])71 if use_resource:72 var0 = resource_variable_ops.ResourceVariable(var0_value, dtype=dtype)73 var1 = resource_variable_ops.ResourceVariable(var1_value, dtype=dtype)74 else:75 var0 = tf_variables.Variable(var0_value, dtype=dtype)76 var1 = tf_variables.Variable(var1_value, dtype=dtype)77 grads0 = tf.placeholder(dtype, shape=var0.get_shape())78 grads1 = tf.placeholder(dtype, shape=var1.get_shape())79 # TODO(manzagop): use a different tail fraction once validator support.80 igt_opt = exp_igt_optimizer.ExpIgtOptimizer(81 learning_rate=LEARNING_RATE, tail_fraction=1.)82 igt_update = igt_opt.apply_gradients(83 list(zip([grads0, grads1], [var0, var1])),84 global_step=tf.train.get_global_step())85 tf_variables.global_variables_initializer().run()86 # Validate we have slots.87 expected_slot_names = set(['estimate', 'true_param', 'update'])88 self.assertEqual(expected_slot_names, set(igt_opt.get_slot_names()))89 for slot_name in expected_slot_names:90 for var in [var0, var1]:91 slot = igt_opt.get_slot(var, slot_name)92 self.assertEqual(slot.get_shape(), var.get_shape())93 self.assertNotIn(slot, tf_variables.trainable_variables())94 # Validate initial values.95 validators = [96 IgtValidator(var0_value, LEARNING_RATE),97 IgtValidator(var1_value, LEARNING_RATE)98 ]99 self._validate(igt_opt, [var0, var1], validators)100 # Run first update and validate.101 g0_first = np.array([0.1, 0.1])102 g1_first = np.array([0.01, 0.01])103 igt_update.run({grads0: g0_first, grads1: g1_first})104 validators[0].update(g0_first)105 validators[1].update(g1_first)106 self._validate(igt_opt, [var0, var1], validators)107 # Run second update and validate.108 g0_second = np.array([0.1, 0.1])109 g1_second = np.array([0.01, 0.01])110 igt_update.run({grads0: g0_second, grads1: g1_second})111 validators[0].update(g0_second)112 validators[1].update(g1_second)113 self._validate(igt_opt, [var0, var1], validators)114 def _validate(self, opt, variables, validators):115 for var, validator in zip(variables, validators):116 slot = opt.get_slot(var, 'estimate')117 self.assertAllCloseAccordingToType(validator.v, slot.eval())118 slot = opt.get_slot(var, 'true_param')119 self.assertAllCloseAccordingToType(validator.w, slot.eval())120 self.assertAllCloseAccordingToType(validator.w_hat, var.eval())121 def testApplyGradients(self):122 self.doTestApplyGradients(use_resource=False)123 def testResourceApplyGradients(self):124 self.doTestApplyGradients(use_resource=True)125 def testMinimize(self):126 """Ensure that minimize actually lowers the loss."""127 with self.test_session():128 w_init = np.random.randn(10)129 w = tf.Variable(w_init, dtype=dtypes.float32)130 loss = tf.reduce_sum(w * w)131 igt_opt = exp_igt_optimizer.ExpIgtOptimizer(132 learning_rate=0.01, tail_fraction=2.)133 igt_update = igt_opt.minimize(loss)134 tf_variables.global_variables_initializer().run()135 loss_pre = loss.eval()136 igt_update.run()137 loss_post = loss.eval()138 self.assertLess(loss_post, loss_pre)139 def testSwap(self):140 with self.cached_session() as sess:141 v_init = np.random.randn(10)142 v = tf.Variable(v_init, dtype=dtypes.float32)143 loss = tf.reduce_sum(v * v)144 opt = exp_igt_optimizer.ExpIgtOptimizer(145 learning_rate=0.01, tail_fraction=2.)146 unused_igt_update = opt.minimize(loss)147 slot = opt.get_slot(v, 'true_param')148 tf_variables.global_variables_initializer().run()149 self.assertAllCloseAccordingToType(v_init, v.eval())150 self.assertAllCloseAccordingToType(v_init, slot.eval())151 zeros = np.zeros(10)152 sess.run(v.assign(zeros))153 self.assertAllCloseAccordingToType(zeros, v.eval())154 self.assertAllCloseAccordingToType(v_init, slot.eval())155 swap_op = opt.swap_true_and_shifted()156 swap_op.run()157 self.assertAllCloseAccordingToType(v_init, v.eval())158 self.assertAllCloseAccordingToType(zeros, slot.eval())159if __name__ == '__main__':...

Full Screen

Full Screen

torch_model_saver.py

Source:torch_model_saver.py Github

copy

Full Screen

1import os2import shutil3from mlagents.torch_utils import torch4from typing import Dict, Union, Optional, cast, Tuple, List5from mlagents_envs.exception import UnityPolicyException6from mlagents_envs.logging_util import get_logger7from mlagents.trainers.model_saver.model_saver import BaseModelSaver8from mlagents.trainers.settings import TrainerSettings, SerializationSettings9from mlagents.trainers.policy.torch_policy import TorchPolicy10from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer11from mlagents.trainers.torch.model_serialization import ModelSerializer12logger = get_logger(__name__)13DEFAULT_CHECKPOINT_NAME = "checkpoint.pt"14class TorchModelSaver(BaseModelSaver):15 """16 ModelSaver class for PyTorch17 """18 def __init__(19 self, trainer_settings: TrainerSettings, model_path: str, load: bool = False20 ):21 super().__init__()22 self.model_path = model_path23 self.initialize_path = trainer_settings.init_path24 self._keep_checkpoints = trainer_settings.keep_checkpoints25 self.load = load26 self.policy: Optional[TorchPolicy] = None27 self.exporter: Optional[ModelSerializer] = None28 self.modules: Dict[str, torch.nn.Modules] = {}29 def register(self, module: Union[TorchPolicy, TorchOptimizer]) -> None:30 if isinstance(module, TorchPolicy) or isinstance(module, TorchOptimizer):31 self.modules.update(module.get_modules()) # type: ignore32 else:33 raise UnityPolicyException(34 "Registering Object of unsupported type {} to ModelSaver ".format(35 type(module)36 )37 )38 if self.policy is None and isinstance(module, TorchPolicy):39 self.policy = module40 self.exporter = ModelSerializer(self.policy)41 def save_checkpoint(self, behavior_name: str, step: int) -> Tuple[str, List[str]]:42 if not os.path.exists(self.model_path):43 os.makedirs(self.model_path)44 checkpoint_path = os.path.join(self.model_path, f"{behavior_name}-{step}")45 state_dict = {46 name: module.state_dict() for name, module in self.modules.items()47 }48 pytorch_ckpt_path = f"{checkpoint_path}.pt"49 export_ckpt_path = f"{checkpoint_path}.onnx"50 torch.save(state_dict, f"{checkpoint_path}.pt")51 torch.save(state_dict, os.path.join(self.model_path, DEFAULT_CHECKPOINT_NAME))52 self.export(checkpoint_path, behavior_name)53 return export_ckpt_path, [pytorch_ckpt_path]54 def export(self, output_filepath: str, behavior_name: str) -> None:55 if self.exporter is not None:56 self.exporter.export_policy_model(output_filepath)57 def initialize_or_load(self, policy: Optional[TorchPolicy] = None) -> None:58 # Initialize/Load registered self.policy by default.59 # If given input argument policy, use the input policy instead.60 # This argument is mainly for initialization of the ghost trainer's fixed policy.61 reset_steps = not self.load62 if self.initialize_path is not None:63 logger.info(f"Initializing from {self.initialize_path}.")64 self._load_model(65 self.initialize_path, policy, reset_global_steps=reset_steps66 )67 elif self.load:68 logger.info(f"Resuming from {self.model_path}.")69 self._load_model(70 os.path.join(self.model_path, DEFAULT_CHECKPOINT_NAME),71 policy,72 reset_global_steps=reset_steps,73 )74 def _load_model(75 self,76 load_path: str,77 policy: Optional[TorchPolicy] = None,78 reset_global_steps: bool = False,79 ) -> None:80 saved_state_dict = torch.load(load_path)81 if policy is None:82 modules = self.modules83 policy = self.policy84 else:85 modules = policy.get_modules()86 policy = cast(TorchPolicy, policy)87 for name, mod in modules.items():88 try:89 if isinstance(mod, torch.nn.Module):90 missing_keys, unexpected_keys = mod.load_state_dict(91 saved_state_dict[name], strict=False92 )93 if missing_keys:94 logger.warning(95 f"Did not find these keys {missing_keys} in checkpoint. Initializing."96 )97 if unexpected_keys:98 logger.warning(99 f"Did not expect these keys {unexpected_keys} in checkpoint. Ignoring."100 )101 else:102 # If module is not an nn.Module, try to load as one piece103 mod.load_state_dict(saved_state_dict[name])104 # KeyError is raised if the module was not present in the last run but is being105 # accessed in the saved_state_dict.106 # ValueError is raised by the optimizer's load_state_dict if the parameters have107 # have changed. Note, the optimizer uses a completely different load_state_dict108 # function because it is not an nn.Module.109 # RuntimeError is raised by PyTorch if there is a size mismatch between modules110 # of the same name. This will still partially assign values to those layers that111 # have not changed shape.112 except (KeyError, ValueError, RuntimeError) as err:113 logger.warning(f"Failed to load for module {name}. Initializing")114 logger.debug(f"Module loading error : {err}")115 if reset_global_steps:116 policy.set_step(0)117 logger.info(118 "Starting training from step 0 and saving to {}.".format(119 self.model_path120 )121 )122 else:123 logger.info(f"Resuming training from step {policy.get_current_step()}.")124 def copy_final_model(self, source_nn_path: str) -> None:125 """126 Copy the .nn file at the given source to the destination.127 Also copies the corresponding .onnx file if it exists.128 """129 final_model_name = os.path.splitext(source_nn_path)[0]130 if SerializationSettings.convert_to_onnx:131 try:132 source_path = f"{final_model_name}.onnx"133 destination_path = f"{self.model_path}.onnx"134 shutil.copyfile(source_path, destination_path)135 logger.info(f"Copied {source_path} to {destination_path}.")136 except OSError:...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run Behave automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful