Aug-07-2020, 08:19 PM
I keep getting errors without any traceback.
# Import pandas datareader import pandas_datareader pandas_datareader.__version__ import pandas as pd from pandas_datareader import data import numpy as np import random as random import tensorflow as tf # Set the start and end date start_date = '2017-01-01' end_date = '2019-02-01' # Set the ticker ticker = 'AMZN' # Get the data data = data.get_data_yahoo(ticker, start_date, end_date) #print(data.head()) pricesdf = data.Close.to_string(index=False) prices = (list(pricesdf.split()))[1:] #print(prices) class DecisionPolicy: def select_action(self, current_state, step): pass def update_q(self, state, action, reward, next_state): pass class RandomDecisionPolicy(DecisionPolicy): def __init__(self, actions): self.actions = actions def select_action(self, current_state, step): action = self.actions[random.randint(0, len(self.actions) -1)] #print(action) return action class QLearningDecisionPolicy(DecisionPolicy): def __init__(self, actions, input_dim): self.epsilon = .5 self.gamma = .001 self.actions = actions output_dim = len(actions) h1_dim = 200 self.sess = tf.Session(target='', graph=None, config=None) self.x = tf.placeholder(tf.float32, [None, input_dim]) self.y = tf.placeholder(tf.float32, [output_dim]) W1 = tf.Variable(tf.random_normal([input_dim,h1_dim])) b1= tf.Variable(tf.constant(0.1,shape=[h1_dim])) h1 = tf.nn.relu(tf.matmul(self.x, W1) + b1) W2 = tf.Variable(tf.random_normal([h1_dim,output_dim])) b2 = tf.Variable(tf.constant(.1, shape=[output_dim])) self.q = tf.nn.relu(tf.matmul(h1,W2) +b2) loss = tf.square(self.y - self.q) self.train_op = tf.train.GradientDescentOptimizer(.01).minimize(loss) def select_action(self,current_state, step): threshold = min(self.epsilon, step / 1000.) if random.random() < threshold: action_q_vals = self.sess.run(self.q,feed_dict={self.x: current_state}) action_idx = np.argmax(action_q_vals) action = self.actions[action_idx] else: action = self.actions[random.randint(0,len(self.actions)-1)] def run_simulation(policy, initial_budget, initial_num_stocks, prices, hist, debug=False): budget = initial_budget num_stocks = initial_num_stocks share_value = 0 transitions = list() for i in range(len(prices) - hist - 1): if i % 100 == 0: #print('progress {:.2f}%'.format(float(100*i) / (len(prices) - hist - 1))) current_state = np.asmatrix(np.hstack((prices[i:i+hist], budget, num_stocks))) current_portfolio = budget + num_stocks * share_value action = policy.select_action(current_state, i) share_value = float(prices[i + hist + 1]) if action == 'Buy' and budget >= share_value: budget -= share_value num_stocks += 1 elif action == 'Sell' and num_stocks > 0: budget += share_value num_stocks -= 1 else: action = 'Hold' new_portfolio = budget + num_stocks * share_value reward = new_portfolio - current_portfolio next_state = np.asmatrix(np.hstack((prices[i+1:i+hist+1], budget, num_stocks))) transitions.append((current_state, action, reward, next_state)) policy.update_q(current_state, action, reward, next_state) portfolio = budget + num_stocks * share_value if debug: print('${}t{} shares'.format(budget, num_stocks)) return portfolio def run_simulations(policy, budget, num_stocks, prices, hist): num_tries = 100 final_portfolios = list() for i in range(num_tries): final_portfolio = run_simulation(policy, budget, num_stocks, prices, hist) final_portfolios.append(final_portfolio) avg, std = np.mean(final_portfolios), np.std(final_portfolios) return avg, std actions = ['Buy', 'Sell', 'Hold'] hist = 200 policy = QLearningDecisionPolicy(actions, input_dim = 202) budget = 1000.0 num_stocks = 0 avg,std=run_simulations(policy,budget,num_stocks,prices, hist) print(avg, std)