Simple CNN on CIFAR10 dataset

cv
Trains a simple deep CNN on the CIFAR10 small images dataset.

Train a simple deep CNN on the CIFAR10 small images dataset.

It gets down to 0.65 test logloss in 25 epochs, and down to 0.55 after 50 epochs, though it is still underfitting at that point.

If doing data augmentation you may try increasing the number of filters in convolutions and in dense layers.

library(keras)

# Parameters --------------------------------------------------------------

batch_size <- 32
epochs <- 50
data_augmentation <- FALSE


# Data Preparation --------------------------------------------------------

# See ?dataset_cifar10 for more info
cifar10 <- dataset_cifar10()
Loaded Tensorflow version 2.9.1
# Feature scale RGB values in test and train inputs  
x_train <- cifar10$train$x/255
x_test <- cifar10$test$x/255
y_train <- cifar10$train$y
y_test <- cifar10$test$y


# Defining Model ----------------------------------------------------------

# Initialize sequential model
model <- keras_model_sequential()


if (data_augmentation) {
  data_augmentation = keras_model_sequential() %>% 
    layer_random_flip("horizontal") %>% 
    layer_random_rotation(0.2)
  
  model <- model %>% 
    data_augmentation()
}

model <- model %>%
  # Start with hidden 2D convolutional layer being fed 32x32 pixel images
  layer_conv_2d(
    filter = 16, kernel_size = c(3,3), padding = "same", 
    input_shape = c(32, 32, 3)
  ) %>%
  layer_activation_leaky_relu(0.1) %>% 

  # Second hidden layer
  layer_conv_2d(filter = 32, kernel_size = c(3,3)) %>%
  layer_activation_leaky_relu(0.1) %>% 

  # Use max pooling
  layer_max_pooling_2d(pool_size = c(2,2)) %>%
  layer_dropout(0.25) %>%
  
  # 2 additional hidden 2D convolutional layers
  layer_conv_2d(filter = 32, kernel_size = c(3,3), padding = "same") %>%
  layer_activation_leaky_relu(0.1) %>% 
  layer_conv_2d(filter = 64, kernel_size = c(3,3)) %>%
  layer_activation_leaky_relu(0.1) %>% 

  # Use max pooling once more
  layer_max_pooling_2d(pool_size = c(2,2)) %>%
  layer_dropout(0.25) %>%
  
  # Flatten max filtered output into feature vector 
  # and feed into dense layer
  layer_flatten() %>%
  layer_dense(256) %>%
  layer_activation_leaky_relu(0.1) %>% 
  layer_dropout(0.5) %>%

  # Outputs from dense layer are projected onto 10 unit output layer
  layer_dense(10)
Warning in layer_conv_2d(., filter = 64, kernel_size = c(3, 3)): partial
argument match of 'filter' to 'filters'
Warning in layer_conv_2d(., filter = 32, kernel_size = c(3, 3), padding =
"same"): partial argument match of 'filter' to 'filters'
Warning in layer_conv_2d(., filter = 32, kernel_size = c(3, 3)): partial
argument match of 'filter' to 'filters'
Warning in layer_conv_2d(., filter = 16, kernel_size = c(3, 3), padding =
"same", : partial argument match of 'filter' to 'filters'
opt <- optimizer_adamax(learning_rate = learning_rate_schedule_exponential_decay(
  initial_learning_rate = 5e-3, 
  decay_rate = 0.96, 
  decay_steps = 1500, 
  staircase = TRUE
))

model %>% compile(
  loss = loss_sparse_categorical_crossentropy(from_logits = TRUE),
  optimizer = opt,
  metrics = "accuracy"
)


# Training ----------------------------------------------------------------
model %>% fit(
  x_train, y_train,
  batch_size = batch_size,
  epochs = epochs,
  validation_data = list(x_test, y_test),
  shuffle = TRUE
)

model %>% evaluate(x_test, y_test)
     loss  accuracy 
0.6197287 0.8200000