Python Forum

Full Version: Errors with Machine Learning trading bot-- not sure why
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
I keep getting errors without any traceback.
# Import pandas datareader
import pandas_datareader
pandas_datareader.__version__
import pandas as pd
from pandas_datareader import data
import numpy as np
import random as random
import tensorflow as tf

# Set the start and end date
start_date = '2017-01-01'
end_date = '2019-02-01'
# Set the ticker
ticker = 'AMZN'
# Get the data
data = data.get_data_yahoo(ticker, start_date, end_date)




#print(data.head())
pricesdf = data.Close.to_string(index=False)
prices = (list(pricesdf.split()))[1:]
#print(prices)   



class DecisionPolicy:
    def select_action(self, current_state, step):
        pass
    def update_q(self, state, action, reward, next_state):
        pass

class RandomDecisionPolicy(DecisionPolicy):
    def __init__(self, actions):
        self.actions = actions
        
    def select_action(self, current_state, step):
        action = self.actions[random.randint(0, len(self.actions) -1)]
        #print(action)
        return action
class QLearningDecisionPolicy(DecisionPolicy):
    def __init__(self, actions, input_dim):
        self.epsilon = .5
        self.gamma = .001
        self.actions = actions
        output_dim = len(actions)
        h1_dim = 200
        self.sess = tf.Session(target='', graph=None, config=None)
        self.x = tf.placeholder(tf.float32, [None, input_dim])
        self.y = tf.placeholder(tf.float32, [output_dim])
        W1 = tf.Variable(tf.random_normal([input_dim,h1_dim]))
        b1= tf.Variable(tf.constant(0.1,shape=[h1_dim]))
        h1 = tf.nn.relu(tf.matmul(self.x, W1) + b1)
        W2 = tf.Variable(tf.random_normal([h1_dim,output_dim]))
        b2 = tf.Variable(tf.constant(.1, shape=[output_dim]))
        self.q = tf.nn.relu(tf.matmul(h1,W2) +b2)
        loss = tf.square(self.y - self.q)
        self.train_op = tf.train.GradientDescentOptimizer(.01).minimize(loss)
    def select_action(self,current_state, step):
        threshold = min(self.epsilon, step / 1000.)
        if random.random() < threshold:
            action_q_vals = self.sess.run(self.q,feed_dict={self.x: current_state})
            action_idx = np.argmax(action_q_vals)
            action = self.actions[action_idx]
        else:
            action = self.actions[random.randint(0,len(self.actions)-1)]


def run_simulation(policy, initial_budget, initial_num_stocks, prices, hist, debug=False):

    budget = initial_budget

    num_stocks = initial_num_stocks

    share_value = 0

    
    transitions = list()

    
    

    for i in range(len(prices) - hist - 1):

        

        if i % 100 == 0:

            #print('progress {:.2f}%'.format(float(100*i) / (len(prices) - hist - 1)))
            

            current_state = np.asmatrix(np.hstack((prices[i:i+hist], budget, num_stocks)))

            current_portfolio = budget + num_stocks * share_value

            action = policy.select_action(current_state, i)

            share_value = float(prices[i + hist + 1])

            if action == 'Buy' and budget >= share_value:

                 budget -= share_value

                 num_stocks += 1

            elif action == 'Sell' and num_stocks > 0:

                budget += share_value

                num_stocks -= 1

            else:

                action = 'Hold'

            new_portfolio = budget + num_stocks * share_value

            reward = new_portfolio - current_portfolio

            next_state = np.asmatrix(np.hstack((prices[i+1:i+hist+1],

            budget, num_stocks)))

            transitions.append((current_state, action, reward, next_state))

            policy.update_q(current_state, action, reward, next_state)

            portfolio = budget + num_stocks * share_value

    if debug:

     print('${}t{} shares'.format(budget, num_stocks))

    return portfolio

def run_simulations(policy, budget, num_stocks, prices, hist):
    num_tries = 100
    final_portfolios = list()
    for i in range(num_tries):
        final_portfolio = run_simulation(policy, budget, num_stocks, prices, hist)
        final_portfolios.append(final_portfolio)
    avg, std = np.mean(final_portfolios), np.std(final_portfolios)
    return avg, std

actions = ['Buy', 'Sell', 'Hold']

hist = 200

policy = QLearningDecisionPolicy(actions, input_dim = 202)

budget = 1000.0

num_stocks = 0

avg,std=run_simulations(policy,budget,num_stocks,prices, hist)

print(avg, std)