level 1
mon-key
楼主
用梯度带进行训练,loss一直在0.68左右浮动,用.fit()方法就下降很快
数据集是用的imdb
fit方法训练结果:
Epoch 10/10
20/20 [==============================] - 0s 3ms/sample - loss: 0.2161 - accuracy: 0.9500 - val_loss: 1.1142 - val_accuracy: 0.6000
梯度带GradientTape训练结果:
steps: 480, losses: 0.693433
val loss 0.69323194
steps: 490, losses: 0.693890
val loss 0.69317454
训练模型:
def lstm():
input = keras.Input(shape=(128,))
embeding = keras.layers.Embedding(vocab_size, 128)(input)
lstm1 = keras.layers.LSTM(128, return_sequences=True)(embeding)
dropout = keras.layers.Dropout(0.3)(lstm1)
lstm2 = keras.layers.LSTM(128)(dropout)
output = keras.layers.Dense(1, activation='sigmoid')(lstm2)
model = keras.Model(input, output)
return model
梯度带:
ll = lstm()
optimizor = tf.keras.optimizers.Adam()
#ll.compile(optimizer='Adam', loss=tf.losses.binary_crossentropy, metrics=['accuracy'])
for i in range(10):#训练次数
print(i)
for step in range(int(vocab_size/batch_size)):#取词
task1_data = next_batch(x1_train, step, batch_size)
task2_data = next_batch(x2_train, step, batch_size)
task1_labels = next_batch(y1_train, step, batch_size)
task2_labels = next_batch(y2_train, step, batch_size)
#fit()方法
# history = ll.fit(task1_data,task1_labels, epochs=10, batch_size=20, validation_data=(task2_data,task2_labels), verbose=1)
# results = ll.evaluate(test_data, test_labels)
# print(results)
#梯度带
with tf.GradientTape() as tape:
out = ll(task1_data
loss = tf.reduce_mean(tf.keras.losses.binary_crossentropy(task1_labels, out))
grad = tape.gradient(loss, ll.variables)
optimizor.apply_gradients(zip(grad, ll.variables))
y_pred = ll(task2_data)
val_loss = tf.reduce_mean(tf.keras.losses.binary_crossentropy(task2_labels, y_pred))
if step % 10 == 0:
print ('steps: %d, losses: %f' %(step, loss.numpy()))
print('val loss', val_loss.numpy())
求教问题出在哪里了
2020年03月01日 02点03分
1
数据集是用的imdb
fit方法训练结果:
Epoch 10/10
20/20 [==============================] - 0s 3ms/sample - loss: 0.2161 - accuracy: 0.9500 - val_loss: 1.1142 - val_accuracy: 0.6000
梯度带GradientTape训练结果:
steps: 480, losses: 0.693433
val loss 0.69323194
steps: 490, losses: 0.693890
val loss 0.69317454
训练模型:
def lstm():
input = keras.Input(shape=(128,))
embeding = keras.layers.Embedding(vocab_size, 128)(input)
lstm1 = keras.layers.LSTM(128, return_sequences=True)(embeding)
dropout = keras.layers.Dropout(0.3)(lstm1)
lstm2 = keras.layers.LSTM(128)(dropout)
output = keras.layers.Dense(1, activation='sigmoid')(lstm2)
model = keras.Model(input, output)
return model
梯度带:
ll = lstm()
optimizor = tf.keras.optimizers.Adam()
#ll.compile(optimizer='Adam', loss=tf.losses.binary_crossentropy, metrics=['accuracy'])
for i in range(10):#训练次数
print(i)
for step in range(int(vocab_size/batch_size)):#取词
task1_data = next_batch(x1_train, step, batch_size)
task2_data = next_batch(x2_train, step, batch_size)
task1_labels = next_batch(y1_train, step, batch_size)
task2_labels = next_batch(y2_train, step, batch_size)
#fit()方法
# history = ll.fit(task1_data,task1_labels, epochs=10, batch_size=20, validation_data=(task2_data,task2_labels), verbose=1)
# results = ll.evaluate(test_data, test_labels)
# print(results)
#梯度带
with tf.GradientTape() as tape:
out = ll(task1_data
loss = tf.reduce_mean(tf.keras.losses.binary_crossentropy(task1_labels, out))
grad = tape.gradient(loss, ll.variables)
optimizor.apply_gradients(zip(grad, ll.variables))
y_pred = ll(task2_data)
val_loss = tf.reduce_mean(tf.keras.losses.binary_crossentropy(task2_labels, y_pred))
if step % 10 == 0:
print ('steps: %d, losses: %f' %(step, loss.numpy()))
print('val loss', val_loss.numpy())
求教问题出在哪里了