다운로드
작성자: admin 작성일시: 2018-04-14 13:09:51 조회수: 590 다운로드: 68
카테고리: 기타 태그목록:

그레디언트 소멸 문제

In [2]:
from keras.datasets import mnist
from keras.utils import np_utils

(X_train0, y_train0), (X_test0, y_test0) = mnist.load_data()

X_train = X_train0.reshape(60000, 784).astype('float32') / 255.0
X_test = X_test0.reshape(10000, 784).astype('float32') / 255.0
Y_train = np_utils.to_categorical(y_train0, 10)
Y_test = np_utils.to_categorical(y_test0, 10)
In [3]:
from keras.models import Sequential
from keras.layers.core import Dense
from keras.optimizers import SGD
In [4]:
model1 = Sequential()
model1.add(Dense(15, input_dim=784, activation="sigmoid"))
model1.add(Dense(10, activation="sigmoid"))
model1.compile(optimizer=SGD(lr=0.2), loss='mean_squared_error', metrics=["accuracy"])
In [5]:
model2 = Sequential()
model2.add(Dense(15, input_dim=784, activation="sigmoid"))
model2.add(Dense(15, activation="sigmoid"))
model2.add(Dense(10, activation="sigmoid"))
model2.compile(optimizer=SGD(lr=0.2), loss='mean_squared_error', metrics=["accuracy"])
In [6]:
model3 = Sequential()
model3.add(Dense(15, input_dim=784, activation="sigmoid"))
model3.add(Dense(15, activation="sigmoid"))
model3.add(Dense(15, activation="sigmoid"))
model3.add(Dense(10, activation="sigmoid"))
model3.compile(optimizer=SGD(lr=0.2), loss='mean_squared_error', metrics=["accuracy"])
In [7]:
model4 = Sequential()
model4.add(Dense(15, input_dim=784, activation="sigmoid"))
model4.add(Dense(15, activation="sigmoid"))
model4.add(Dense(15, activation="sigmoid"))
model4.add(Dense(15, activation="sigmoid"))
model4.add(Dense(10, activation="sigmoid"))
model4.compile(optimizer=SGD(lr=0.2), loss='mean_squared_error', metrics=["accuracy"])
In [8]:
from keras.callbacks import Callback

class WeightHistory(Callback):
    
    def __init__(self, model, num_epoch):
        super().__init__()
        self.model = model
        self.num_hidden = len(model.layers) - 1
        self.weight_old = [self.model.layers[i].get_weights()[0] for i in range(self.num_hidden)]
        self.weight = [0.0 for i in range(self.num_hidden)]
        self.weight_change = np.zeros((self.num_hidden, num_epoch))
        
    def on_epoch_end(self, epoch, logs={}):
        for i in range(self.num_hidden):
            self.weight[i] = self.model.layers[i].get_weights()[0]
            self.weight_change[i, epoch] = np.abs((self.weight[i] - self.weight_old[i]).mean())
            self.weight_old[i] = self.weight[i]
            
In [9]:
num_epoch = 500
In [10]:
%%time
callback1 = WeightHistory(model1, num_epoch)
hist1 = model1.fit(X_train, Y_train, epochs=num_epoch, batch_size=100, callbacks=[callback1], verbose=0)
CPU times: user 40min 55s, sys: 1h 1min 9s, total: 1h 42min 5s
Wall time: 34min 10s
In [82]:
plt.subplot(2, 1, 1)
plt.plot(hist1.history['acc'])
plt.ylabel("Accuracy")
plt.subplot(2, 1, 2)
plt.plot(callback1.weight_change[0], label="hidden 1")
plt.xlabel("epoch")
plt.ylabel("Average Weight Change")
plt.legend()
plt.show()
In [99]:
hist1.history['acc'][-1]
Out:
0.942866667509079
In [83]:
%%time
callback2 = WeightHistory(model2, num_epoch)
hist2 = model2.fit(X_train, Y_train, epochs=num_epoch, batch_size=100, callbacks=[callback2], verbose=0)
CPU times: user 28min 35s, sys: 2min 42s, total: 31min 17s
Wall time: 14min 57s
In [84]:
plt.subplot(2, 1, 1)
plt.plot(hist2.history['acc'])
plt.ylabel("Accuracy")
plt.subplot(2, 1, 2)
plt.plot(callback2.weight_change[0], 'r', label="hidden 1")
plt.plot(callback2.weight_change[1], 'g', label="hidden 2")
plt.xlabel("epoch")
plt.ylabel("Average Weight Change")
plt.ylim(0, np.max(callback2.weight_change[-1][5:]))
plt.legend()
plt.show()
In [98]:
hist2.history['acc'][-1]
Out:
0.9475666667024295
In [85]:
%%time
callback3 = WeightHistory(model3, num_epoch)
hist3 = model3.fit(X_train, Y_train, epochs=num_epoch, batch_size=100, callbacks=[callback3], verbose=0)
CPU times: user 29min 59s, sys: 2min 56s, total: 32min 55s
Wall time: 16min 17s
In [86]:
plt.subplot(2, 1, 1)
plt.plot(hist3.history['acc'])
plt.ylabel("Accuracy")
plt.subplot(2, 1, 2)
plt.plot(callback3.weight_change[0], 'r', label="hidden 1")
plt.plot(callback3.weight_change[1], 'g', label="hidden 2")
plt.plot(callback3.weight_change[2], 'b', label="hidden 3")
plt.xlabel("epoch")
plt.ylabel("Average Weight Change")
plt.ylim(0, np.max(callback3.weight_change[-1][5:]))
plt.legend()
plt.show()
In [97]:
hist3.history['acc'][-1]
Out:
0.9509166686733563
In [87]:
%%time
callback4 = WeightHistory(model4, num_epoch)
hist4 = model4.fit(X_train, Y_train, epochs=num_epoch, batch_size=100, callbacks=[callback4], verbose=0)
CPU times: user 31min 11s, sys: 3min 10s, total: 34min 21s
Wall time: 17min 26s
In [93]:
plt.subplot(2, 1, 1)
plt.plot(hist4.history['acc'])
plt.ylabel("Accuracy")
plt.subplot(2, 1, 2)
plt.plot(callback4.weight_change[0], 'r', label="hidden 1")
plt.plot(callback4.weight_change[1], 'g', label="hidden 2")
plt.plot(callback4.weight_change[2], 'b', label="hidden 3")
plt.plot(callback4.weight_change[3], 'k', label="hidden 4")
plt.xlabel("epoch")
plt.ylabel("Average Weight Change")
plt.ylim(0, np.max(callback4.weight_change[-1][5:]))
plt.legend()
plt.show()