Dec-08-2017, 05:09 PM

I am trying to implement a gradient descent algorithm for linear regression. I am using the attached data. My algorithm is shown below:

import numpy as np import csv import matplotlib.pyplot as plt path = '' with open(path + 'ex1data1.txt', 'r') as f: results = list(csv.reader(f)) population = [] profit = [] for i in results: population.append(float(i[0])) profit.append(float(i[1])) def gradientDescent(xData, yData, a, iterations, theta0, theta1): J = [] it = [] for i in xrange(iterations): print i cost0 = 0 cost1 = 0 for j in xrange(len(xData)): cost0 += (theta0 + theta1*xData[j] - yData[j]) ** 2 cost1 += (theta0 + theta1*xData[j] - yData[j]) ** 2 * xData[j] tmp0 = theta0 - a*cost0 tmp1 = theta1 - a*cost1 theta0 = tmp0 theta1 = tmp1 J.append(cost1) it.append(i) return theta0, theta1, J, it result = gradientDescent(population, profit, 0.000000005, 8000, 1, 2) print 'y = %s + %sx' % (result[0], result[1]) plt.plot(result[3], result[2]) plt.show() x = np.arange(0, 30, 1) y = result[0] + result[1]*x plt.plot(x, y) plt.plot(population, profit, 'rx') plt.show()