Deep Learning with Python 读书笔记 (Part 4)

发布于 September 24, 2019

Deep Learning with Python 第五章:Deep learning for computer vision

主要讲以下几部分内容

  • 理解卷积神经网络(convolutional neural networks)
  • 使用数据增强(data augmentation)来避免过拟
  • 使用预训练的卷积神经网络模型来做特征提取(feature extraction)
  • 优化预训练的模型
  • 可视化

训练一个基于小样本的卷积神经网络

我们来看一个训练卷积神经网络模型的实际例子,来解决 Dogs vs. Cats 问题。样本中包含 25000 张猫或狗的图片(各 12500 张)。我们会从每个类别中取 1000 张作为 training set,500 张作为 validation set,500 张作为 test set

  • 首先,训练一个简单的卷积神经网络,不采用任何正则化(regularization),作为 baseline。这个结果大概能有 71% 的准确率。
  • 使用 data augmentation 来解决过拟问题。这将提高到 82% 的准确率。
  • 然后,使用使用预训练的卷积神经网络模型来做特征提取。90% ~ 96 的准确率。
  • 最后,进行模型调优。最后能达到 97% 的准确率。
  1. 从 Kaggle 上下载 Dogs vs. Cats 数据。

     import os, shutil
    
     # Copying images to training, validation, and test directories
     original_dataset_dir = '/Data/kaggle_original_data'
    
     base_dir = '/Data/cats_and_dogs_small'
     os.mkdir(base_dir)
    
     train_dir = os.path.join(base_dir, 'train')
     os.mkdir(train_dir)
     validation_dir = os.path.join(base_dir, 'validation')
     os.mkdir(validation_dir)
     test_dir = os.path.join(base_dir, 'test')
     os.mkdir(test_dir)
    
     train_cats_dir = os.path.join(train_dir, 'cats')
     os.mkdir(train_cats_dir)
    
     train_dogs_dir = os.path.join(train_dir, 'dogs')
     os.mkdir(train_dogs_dir)
    
     validation_cats_dir = os.path.join(validation_dir, 'cats')
     os.mkdir(validation_cats_dir)
    
     validation_dogs_dir = os.path.join(validation_dir, 'dogs')
     os.mkdir(validation_dogs_dir)
    
     test_cats_dir = os.path.join(test_dir, 'cats')
     os.mkdir(test_cats_dir)
    
     test_dogs_dir = os.path.join(test_dir, 'dogs')
     os.mkdir(test_dogs_dir)
    
     fnames = ['cat.{}.jpg'.format(i) for i in range(1000)]
     for fname in fnames:
         src = os.path.join(original_dataset_dir, fname)
         dst = os.path.join(train_cats_dir, fname)
         shutil.copyfile(src, dst)
    
     fnames = ['cat.{}.jpg'.format(i) for i in range(1000, 1500)]
     for fname in fnames:
         src = os.path.join(original_dataset_dir, fname)
         dst = os.path.join(validation_cats_dir, fname)
         shutil.copyfile(src, dst)
    
     fnames = ['cat.{}.jpg'.format(i) for i in range(1500, 2000)]
     for fname in fnames:
         src = os.path.join(original_dataset_dir, fname)
         dst = os.path.join(test_cats_dir, fname)
         shutil.copyfile(src, dst)
    
     fnames = ['dog.{}.jpg'.format(i) for i in range(1000)]
     for fname in fnames:
         src = os.path.join(original_dataset_dir, fname)
         dst = os.path.join(train_dogs_dir, fname)
         shutil.copyfile(src, dst)
     fnames = ['dog.{}.jpg'.format(i) for i in range(1000, 1500)]
     for fname in fnames:
         src = os.path.join(original_dataset_dir, fname)
         dst = os.path.join(validation_dogs_dir, fname)
         shutil.copyfile(src, dst)
    
     fnames = ['dog.{}.jpg'.format(i) for i in range(1500, 2000)]
     for fname in fnames:
         src = os.path.join(original_dataset_dir, fname)
         dst = os.path.join(test_dogs_dir, fname)
         shutil.copyfile(src, dst)
    
     # a sanity test
     print('total training cat images:', len(os.listdir(train_cats_dir)))
     print('total training dog images:', len(os.listdir(train_dogs_dir)))
     print('total validation cat images:', len(os.listdir(validation_cats_dir)))
     print('total validation dog images:', len(os.listdir(validation_dogs_dir)))
     print('total test cat images:', len(os.listdir(test_cats_dir)))
     print('total test dog images:', len(os.listdir(test_dogs_dir)))
    
  2. 创建一个卷积模型

    from keras import layers
    from keras import models
    from keras import optimizers
    from keras.preprocessing.image import ImageDataGenerator
    
    # Instantiating a small convnet for dogs vs. cats classification
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.summary()
    model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['acc'])
    
    # data processing
    train_datagen = ImageDataGenerator(rescale=1./255)
    test_datagen = ImageDataGenerator(rescale=1./255)
    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(150, 150)
        batch_size=20,
        class_mode='binary')
    validation_generator = test_datagen.flow_from_directory(
        validation_dir,
        target_size=(150, 150),
        batch_size=20,
        class_mode='binary')
    
    # Fitting the model using a batch generator
    history = model.fit_generator(
      train_generator,
      steps_per_epoch=100,
      epochs=30,
      validation_data=validation_generator,
      validation_steps=50)
    model.save('cats_and_dogs_small_1.h5')
    
  3. 使用 data augmentation 来解决过拟

    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu',
                            input_shape=(150, 150, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy',
                optimizer=optimizers.RMSprop(lr=1e-4),
                metrics=['acc'])
    
    # Training the convnet using data-augmentation generators
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,)
    
    test_datagen = ImageDataGenerator(rescale=1./255)
    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(150, 150),
        batch_size=32,
        class_mode='binary')
    validation_generator = test_datagen.flow_from_directory(
        validation_dir,
        target_size=(150, 150),
        batch_size=32,
        class_mode='binary')
    
    history = model.fit_generator(
        train_generator,
        steps_per_epoch=100,
        epochs=100,
        validation_data=validation_generator,
        validation_steps=50)
    model.save('cats_and_dogs_small_2.h5')
    
  4. 两种方法来使用预训练模型:feature extractionfine-tuning。代码略。

卷积神经网络的可视化

几种可视化的方法:

  • Visualizing intermediate convnet outputs (intermediate activations)

    intermediate_activations

  • Visualizing convnets filters

    filter_patterns

  • Visualizing heatmaps of class activation in an image

    heatmaps

总结

卷积神经网络是用于解决视觉分类问题的最好工具。

E