How to use state_iterator method in autotest

1import pandas as pd2import enum34from tensorforce import Environment5from tensorforce.agents import Agent6from fe_module import features_file_path, flash_crash_event_starting_timestamp, flash_crash_event_ending_timestamp789class Decision(enum.Enum):10 NORMAL = 011 ABNORMAL = 1121314class ExchangeEnvironment(Environment):15 def __init__(self, environment_observations):16 super().__init__()17 self.state_iterator = 018 self.reward_var = environment_observations['REWARD']19 self.environment_observations = environment_observations.iloc[:, :-2]20 self.no_states = self.environment_observations.shape[0]21 self.current_state = {}2223 def states(self):24 state_dict = dict(type='float', shape=(7,))25 return state_dict2627 def actions(self):28 return {"DECISION": dict(type="int", num_values=2)}2930 def close(self):31 super().close()3233 def reset(self):34 self.state_iterator = 035 self.current_state = self.environment_observations.iloc[self.state_iterator]36 return self.current_state3738 def max_episode_timesteps(self):39 return self.no_states4041 def check_terminal_criteria(self, actions, all_decisions, timestamp, early_terminate=False):42 terminate = False43 cutoff_timestamp = '2022-05-02 07:58:20.119000+00:00'4445 if early_terminate:46 if pd.to_datetime(timestamp) < pd.to_datetime(flash_crash_event_starting_timestamp) and \47 actions['DECISION'] == 1:48 terminate = True49 elif pd.to_datetime(timestamp) > pd.to_datetime(cutoff_timestamp) and \50 sum(all_decisions) == 0:51 terminate = True52 if self.state_iterator == self.no_states - 1:53 terminate = True54 return terminate5556 def calculate_reward(self, reward_val, timestamp, decision, terminal):57 if terminal is True and pd.to_datetime(timestamp) < pd.to_datetime(flash_crash_event_ending_timestamp):58 reward = -20059 else:60 if pd.to_datetime(timestamp) < pd.to_datetime(flash_crash_event_starting_timestamp):61 if decision == Decision.NORMAL.value:62 reward = reward_val63 else:64 reward = 06566 elif pd.to_datetime(flash_crash_event_starting_timestamp) <= pd.to_datetime(timestamp) <= \67 pd.to_datetime(flash_crash_event_ending_timestamp):68 if decision == Decision.NORMAL.value:69 reward = -reward_val70 else:71 reward = 07273 elif pd.to_datetime(timestamp) > pd.to_datetime(flash_crash_event_ending_timestamp):74 if decision == Decision.ABNORMAL.value:75 reward = -3*reward_val76 else:77 reward = 07879 return reward8081 def report_accuracy(self, all_decisions, y):82 y_before = y[:y.index(1)]83 y_during = y[y.index(1): y[y.index(1):].index(0)+len(y_before)]84 y_after = y[::-1][:y[::-1].index(1)]8586 accuracy = {}87 true_before = 088 true_during = 089 true_after = 09091 for i in range(len(y)):92 if all_decisions[i] == y[i]:93 if i <= len(y_before):94 true_before += 195 elif len(y_before) < i < len(y_before) + len(y_during):96 true_during += 197 elif i >= len(y_before) + len(y_during):98 true_after += 199100 accuracy['before'] = true_before / len(y_before)*100101 accuracy['during'] = true_during / len(y_during)*100102 accuracy['after'] = true_after / len(y_after)*100103 accuracy['overall'] = (true_after+true_before+true_during) / len(y)*100104105 print('Accuracy before the crash: ', accuracy['before'])106 print('Accuracy during crash: ', accuracy['during'])107 print('Accuracy after the crash:', accuracy['after'])108 print('Overall Accuracy: ', accuracy['overall'])109110 return accuracy111112 def execute(self, all_decisions, actions):113 timestamp = self.environment_observations.index[self.state_iterator]114 self.state_iterator += 1115 reward_val = self.reward_var.iloc[self.state_iterator]116 next_state = self.environment_observations.iloc[self.state_iterator]117 terminal = self.check_terminal_criteria(118 actions, all_decisions, timestamp, early_terminate=False)119 reward = self.calculate_reward(reward_val,120 timestamp, actions['DECISION'], terminal)121122 return next_state, terminal, reward123124125class ExchangeAgent:126 def __init__(self, environment: ExchangeEnvironment):127128 self.market_evaluation = Agent.create(129 agent='dqn', environment=environment, batch_size=128, memory=len(y))130 self.episode_actions = {}131132133def execute_episode_batch(environment: ExchangeEnvironment, agent: ExchangeAgent, episodes):134 decision_reward_df = pd.DataFrame()135 for i in range(episodes):136 episode_length = 0137 state_vector = environment.reset()138 all_timestamps = []139 all_decisions = []140 all_rewards = []141 terminate = False142 while not terminate:143 episode_length += 1144 actions = agent.market_evaluation.act(states=state_vector)145 state_vector, terminate, reward = environment.execute(all_decisions,146 actions=actions)147 agent.market_evaluation.observe(148 terminal=terminate, reward=reward)149 all_decisions.append(actions['DECISION'])150 all_rewards.append(reward)151 all_timestamps.append( if episode_length == len(y):154 decision_reward_df['Timestamp'] = all_timestamps155 decision_reward_df[f'model/Decision_Episode{i}'] = all_decisions156 decision_reward_df[f'Rewards_Episode{i}'] = all_rewards157 accuracy = environment.report_accuracy(all_decisions, y)158 if accuracy['overall'] > 90:159 decision_reward_df.to_excel('Decision_reward.xlsx')160161 if accuracy['before'] == 100 and accuracy['during'] > 70 and accuracy['after'] == 100:162 decision_reward_df.to_excel('Decision_reward.xlsx')163 break164165166def simulator(environment: ExchangeEnvironment, agent: ExchangeAgent, episodes=100):167168 execute_episode_batch(environment, agent, episodes)169 environment.close()170171172if __name__ == "__main__":173174 environment_observations = pd.read_csv(175 features_file_path + 'df_environment.csv', index_col='Timestamp')176 exchange_environment = ExchangeEnvironment(177 environment_observations)178 y = environment_observations['LABEL'][1:].to_list()179 exchange_agent = ExchangeAgent(exchange_environment) ...

...13 self.job = job14 self.finished = False15 self.line_buffer = status_lib.line_buffer()16 # create and prime the parser state machine17 self.state = self.state_iterator(self.line_buffer)18 def process_lines(self, lines):20 """ Feed 'lines' into the parser state machine, and return21 a list of all the new test results produced."""22 self.line_buffer.put_multiple(lines)23 try:24 return except StopIteration:26 msg = ("WARNING: parser was called to process status "27 "lines after it was end()ed\n"28 "Current traceback:\n" +29 traceback.format_exc() +30 "\nCurrent stack:\n" +31 "".join(traceback.format_stack()))32 tko_utils.dprint(msg)33 return []34 def end(self, lines=[]):35 """ Feed 'lines' into the parser state machine, signal to the36 state machine that no more lines are forthcoming, and then37 return a list of all the new test results produced."""38 self.line_buffer.put_multiple(lines)39 # run the state machine to clear out the buffer40 self.finished = True41 try:42 return except StopIteration:44 msg = ("WARNING: parser was end()ed multiple times\n"45 "Current traceback:\n" +46 traceback.format_exc() +47 "\nCurrent stack:\n" +48 "".join(traceback.format_stack()))49 tko_utils.dprint(msg)50 return []51 @staticmethod52 def make_job(dir):53 """ Create a new instance of the job model used by the54 parser, given a results directory."""55 raise NotImplementedError56 def state_iterator(self, buffer):57 """ A generator method that implements the actual parser58 state machine. """...

