I have a python code to accelerate using numba. It involves a class, tuples and dictionary. Can anyone help me find a wayout in this code?
@njit
def q_learn(EPSILON,episode_rewards,q_table):
for episode in range(N_EPISODES+1):
environment = Environment(some integer data)
print(f'Currently at {episode} Episode')
if episode%SHOW_EVERY==0:
print(f'Episode {episode} epsilon:{EPSILON}')
print(f'{SHOW_EVERY} episode mean reward {np.mean(episode_rewards[-SHOW_EVERY:])}')
with open(f"q-table_age{int(time.time())}.pickle","wb") as f:
pickle.dump(q_table,f)
epi_rew = 0.0
for i in range(100):
rd = random.randint(0,15)
x = test_data[rd]
y = test_data[rd]
obs = (tuples of size 3 )
#print(obs)
if np.random.random()>EPSILON:
action = np.argmax(q_table[obs])+1
else:
action = np.random.randint(1,4)
reward = environment.action(3 integer and 1 float arguments)
environment.battery_evolution(integer)
new_obs = (tuple of size 3)
#print(new_obs)
max_future_q = np.max(q_table[new_obs])
current_q = q_table[new_obs][action-1]
new_q = (1-LEARNING_RATE)*current_q + LEARNING_RATE*(reward + DISCOUNT*max_future_q)
q_table[obs][action-1] = new_q
epi_rew+=reward
#episode_rewards.append(epi_rew)
episode_rewards[episode] = epi_rew
EPSILON *= EPSILON_DECAY
moving_avg = np.convolve(episode_rewards,np.ones((SHOW_EVERY,))/SHOW_EVERY,mode="Valid")
return moving_avg