梯度下降法[Gradient Descent]

初步认识梯度下降这一算法,认识并分析其优缺点以更好地利用此算法。

简介wiki

描述



线性回归的应用

参考这里

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54

from numpy import *
from matplotlib import pyplot as plt

# y = mx + b
# m is slope, b is y-intercept
def compute_error_for_line_given_points(b, m, points):
totalError = 0
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
totalError += (y - (m * x + b)) ** 2
return totalError / float(len(points))

def step_gradient(b_current, m_current, points, learningRate):
b_gradient = 0
m_gradient = 0
N = float(len(points))
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
b_gradient += -(2/N) * (y - ((m_current * x) + b_current))
m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))
new_b = b_current - (learningRate * b_gradient)
new_m = m_current - (learningRate * m_gradient)
return [new_b, new_m]

def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):
b = starting_b
m = starting_m
for i in range(num_iterations):
b, m = step_gradient(b, m, array(points), learning_rate)
return [b, m]

def run():
points = genfromtxt("data.csv", delimiter=",")
learning_rate = 0.0001
initial_b = 0 # initial y-intercept guess
initial_m = 0 # initial slope guess
num_iterations = 1000
print("Starting gradient descent at b = {0}, m = {1}, error = {2}".format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points)))
print("Running...")
[b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)
print("After {0} iterations b = {1}, m = {2}, error = {3}".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))

# 作图[只是为了直观地看下拟合的效果]
plt.plot([points[i,0] for i in range(len(points))], [points[i,1] for i in range(len(points))], 'bo')
x = linspace(min([points[i,0] for i in range(len(points))])-5, max([points[i,0] for i in range(len(points))])+5, 1000)
plt.plot(x, m*x+b)
plt.show()

if __name__ == '__main__':
run()


运行结果:

梯度下降的弊端及验证

尝试验证wiki的下图:

选取不同的学习率,实验结果如下:

E1:Learning Rate = 0.0045


E2: Learning Rate = 0.0025



可以看到还是有一定的差别,推测与学习率和迭代次数有关,这个暂时留到以后深究。

Python实现源码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python3
# encoding: utf-8

from matplotlib import pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import sys

# 递归实现逐次求梯度,这里为了充分迭代,修改递归次数限制
sys.setrecursionlimit(2000)

# 计算Rosenbrock函數的值
def func(X, Y):
return (1 - X) ** 2 + 100 * ((Y - X ** 2) ** 2)

def step_gradient(pre, learningRate=0.0025): #0.0025
scatter.append(pre)
x = pre[0]
y = pre[1]
x_gradient = 2*(x-1)+400*x*(x**2-y)
y_gradient = 200*(y-x**2)
step_x = learningRate * x_gradient
step_y = learningRate * y_gradient
steps = [step_x, step_y]
now = [pre[0] - steps[0], pre[1] - steps[1]]
# 将新的点存储到scatter列表里面去
z = func(now[0], now[1])
# print(z)
z_l.append(z)
try:
step_gradient(now)
except:
pass

if __name__=='__main__':
fig = plt.figure()
# ax = Axes3D(fig)
# 随机度下降起始点
point = (-0.5, 0.5)
scatter = []
z_l = []
scatter.append(point)
step_gradient(point)
print('Total times', len(scatter))
print("Min_z-->", min(z_l))
plt.plot([point[0] for point in scatter], [point[1] for point in scatter], 'r')

X = np.arange(-0.8, 1, 0.01)
Y = np.arange(-0.2, 1.1, 0.025)
X, Y = np.meshgrid(X, Y)
Z = func(X, Y)

# ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap='rainbow')
# plt.show()
CS = plt.contour(X, Y, Z, 100)
manual_locations = [(i, i+0.02) for i in np.linspace(-0.2, 0.3, 10)]
manual_locations.append((1,1))

plt.clabel(CS, inline=1, fontsize=8, manual=manual_locations)
plt.title('Rosenbrock')
plt.show()



适当调节参数,也可以作出下面的图:

第一次补充:[尝试放大局部图像找到所谓的“之”字形失败]
之前未能实现wiki“之”字形震荡, 就想到可能是点过于密集导致微小的弯折被掩盖,于是进行局部放大图像的尝试。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

#!/usr/bin/env python3
# encoding: utf-8

from matplotlib import pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import sys

# 递归实现逐次求梯度,这里为了充分迭代,修改递归次数限制
sys.setrecursionlimit(2000)

# 计算Rosenbrock函數的值
def func(X, Y):
return (1 - X) ** 2 + 100 * ((Y - X ** 2) ** 2)

def step_gradient(pre, learningRate=0.0045): #0.0025
scatter.append(pre)
x = pre[0]
y = pre[1]
x_gradient = 2*(x-1)+400*x*(x**2-y)
y_gradient = 200*(y-x**2)
step_x = learningRate * x_gradient
step_y = learningRate * y_gradient
steps = [step_x, step_y]
now = [pre[0] - steps[0], pre[1] - steps[1]]
# 将新的点存储到scatter列表里面去
z = func(now[0], now[1])
# print(z)
z_l.append(z)
try:
step_gradient(now)
except:
pass

if __name__=='__main__':

fig = plt.figure(figsize=(16, 8), dpi=98)
p1 = plt.subplot(121, aspect=1.8 / 1.3)
p2 = plt.subplot(122, aspect=0.06 / 0.001)

# 随机度下降起始点
point = (-0.5, 0.5)
scatter = []
z_l = []
scatter.append(point)
step_gradient(point)
print('Total times', len(scatter))
print("Min_z-->", min(z_l))
p1.plot([point[0] for point in scatter], [point[1] for point in scatter], 'r')

p2.plot([point[0] for point in scatter], [point[1] for point in scatter], 'r')
p2.axis([-0.02, 0.04, -0.0005, 0.0005])

X = np.arange(-0.8, 1, 0.01)
Y = np.arange(-0.2, 1.1, 0.025)
X, Y = np.meshgrid(X, Y)
Z = func(X, Y)

CS = p1.contour(X, Y, Z, 100)
manual_locations = [(i, i+0.02) for i in np.linspace(-0.2, 0.3, 10)]
manual_locations.append((1,1))
p1.clabel(CS, inline=1, fontsize=8, manual=manual_locations)

plt.show()

得到结果如下[显然实验失败了:-)]

击蒙御寇