笑而不语的谁 笑而不语的谁
签名是一种态度,我想我可以更酷...
关注数: 8 粉丝数: 27 发帖数: 542 关注贴吧数: 43
loss很快为0,如何解决 大致如下,我建立了一个5层conv,2层fc的深度网络对cifar100进行学习。epoch=50,adam(lr=1e-3) 我将60k的数据,分为50k为train,然后10k为test 其中再把50k中分为train_data 和 validation_data,比例为9:1 当我没有分出来validation数据的时候,运行的很顺畅,但是当我在每一次epoch开始的时候随机打乱数据分为train和validation数据后,就开始出现问题了。当epoch为2时候,在validation上的acc竟然已经开始达到100%了 ,最后在test上的acc却只为1%。我实在是新手,也找不到bug出在哪儿,希望大佬们可以赐教。 代码如下(我是在colab上跑的) import tensorflow as tf from tensorflow.keras import layers, optimizers, datasets, Sequential import os tf.random.set_seed(2345) conv_layers = [ # 5 units of conv + max pooling # unit 1 # 64 means: every conv layer has 64 kernels layers.Conv2D(64, kernel_size=[3,3], padding='same', activation=tf.nn.relu), layers.Conv2D(64, kernel_size=[3,3], padding='same', activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2,2], strides=2, padding='same'), # unit 2 layers.Conv2D(128, kernel_size=[3,3], padding='same', activation=tf.nn.relu), layers.Conv2D(128, kernel_size=[3,3], padding='same', activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2,2], strides=2, padding='same'), # unit 3 layers.Conv2D(256, kernel_size=[3,3], padding='same', activation=tf.nn.relu), layers.Conv2D(256, kernel_size=[3,3], padding='same', activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2,2], strides=2, padding='same'), # unit 4 layers.Conv2D(512, kernel_size=[3,3], padding='same', activation=tf.nn.relu), layers.Conv2D(512, kernel_size=[3,3], padding='same', activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2,2], strides=2, padding='same'), # unit 5 layers.Conv2D(512, kernel_size=[3,3], padding='same', activation=tf.nn.relu), layers.Conv2D(512, kernel_size=[3,3], padding='same', activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2,2], strides=2, padding='same'), ] fc_layers = [ layers.Dense(256, activation=tf.nn.relu), layers.Dense(256, activation=tf.nn.relu), layers.Dense(100, activation=None), ] def preprocess(x,y): x = tf.cast(x,dtype=tf.float32)/255. #[0-1] y = tf.cast(y,dtype=tf.int32) return x,y # load data # x:[50k,32,32,3], x_test:[10k,32,32,3] (x,y),(x_test,y_test) = datasets.cifar100.load_data() y = tf.squeeze(y,axis = 1) y_test = tf.squeeze(y_test,axis = 1) test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)) test_db = test_db.map(preprocess).batch(64) #[b,32,32,3]=>[b,1,1,512] conv_net = Sequential(conv_layers) conv_net.build(input_shape=[None,32,32,3]) fc_net = Sequential(fc_layers) fc_net.build(input_shape=[None,512]) optimizer = optimizers.Adam(lr = 1e-4) # combine the variables of the conv net and fully connected net variables = conv_net.trainable_variables + fc_net.trainable_variables for epoch in range(50): idx = tf.range(50000) idx = tf.random.shuffle(idx) # train_num : validation_num => 0.9: 0.1 x_train, y_train = tf.gather(x, idx[:45000]), tf.gather(y, idx[:45000]) x_val, y_val = tf.gather(x, idx[-5000:]), tf.gather(y, idx[-5000:]) print('train data', x_train.shape, y_train.shape, 'validation data',x_val.shape, y_val.shape) train_db = tf.data.Dataset.from_tensor_slices((x_train,y_train)) train_db = train_db.map(preprocess).shuffle(10000).batch(64) val_db = tf.data.Dataset.from_tensor_slices((x_val,y_val)) val_db = val_db.map(preprocess).shuffle(10000).batch(64) for step, (x,y) in enumerate(train_db): with tf.GradientTape() as tape: # [b,32,32,3] => [b,1,1,512] out = conv_net(x) # [b,1,1,512] => [b,512] flatten out = tf.reshape(out,[-1,512]) # [b,512] => [b,100] logits = fc_net(out) # y: [b,]=>[b,100] y_onehot = tf.one_hot(y, depth=100) # compute loss loss = tf.losses.categorical_crossentropy(y_onehot,logits, from_logits=True) loss = tf.reduce_mean(loss) grads = tape.gradient(loss, variables) optimizer.apply_gradients(zip(grads, variables)) if step%100 == 0: print('epoch:',epoch,'step:',step,'loss:',float(loss)) # validation part total_num = 0 total_correct = 0 for x,y in val_db: out = conv_net(x) out = tf.reshape(out,[-1,512]) logits = fc_net(out) prob = tf.nn.softmax(logits, axis=1) pred = tf.argmax(prob, axis=1) pred = tf.cast(pred, dtype=tf.int32) correct = tf.cast(tf.equal(pred,y),dtype=tf.int32) correct = tf.reduce_sum(correct) total_num += x.shape[0] total_correct += int(correct) acc = total_correct / total_num print('in validation data','epoch:',epoch,'acc=',acc) # use test data to do the prediction total_num = 0 total_correct = 0 for x,y in test_db: out = conv_net(x) out = tf.reshape(out,[-1,512]) logits = fc_net(out) prob = tf.nn.softmax(logits, axis=1) pred = tf.argmax(prob, axis=1) pred = tf.cast(pred, dtype=tf.int32) correct = tf.cast(tf.equal(pred,y),dtype=tf.int32) correct = tf.reduce_sum(correct) total_num += x.shape[0] total_correct += int(correct) acc = total_correct / total_num print('in test data','acc=',acc)
1 下一页