eager_styletransfer

    This is the companion code to the post “Neural style transfer with eager execution and Keras” on the TensorFlow for R blog.

    https://blogs.rstudio.com/tensorflow/posts/2018-09-09-eager-style-transfer

    library(keras)
    use_implementation("tensorflow")
    use_session_with_seed(7777, disable_gpu = FALSE, disable_parallel_cpu = FALSE)
    library(tensorflow)
    tfe_enable_eager_execution(device_policy = "silent")
    
    library(purrr)
    library(glue)
    
    img_shape <- c(128, 128, 3)
    content_path <- "isar.jpg"
    style_path <- "The_Great_Wave_off_Kanagawa.jpg"
    
    
    num_iterations <- 2000
    content_weight <- 100
    style_weight <- 0.8
    total_variation_weight <- 0.01
    
    content_image <-
      image_load(content_path, target_size = img_shape[1:2])
    content_image %>% image_to_array() %>%
      `/`(., 255) %>%
      as.raster() %>%  plot()
    
    style_image <-
      image_load(style_path, target_size = img_shape[1:2])
    style_image %>% image_to_array() %>%
      `/`(., 255) %>%
      as.raster() %>%  plot()
    
    
    load_and_process_image <- function(path) {
      img <- image_load(path, target_size = img_shape[1:2]) %>%
        image_to_array() %>%
        k_expand_dims(axis = 1) %>%
        imagenet_preprocess_input()
    }
    
    deprocess_image <- function(x) {
      x <- x[1, , ,]
      # Remove zero-center by mean pixel
      x[, , 1] <- x[, , 1] + 103.939
      x[, , 2] <- x[, , 2] + 116.779
      x[, , 3] <- x[, , 3] + 123.68
      # 'BGR'->'RGB'
      x <- x[, , c(3, 2, 1)]
      x[x > 255] <- 255
      x[x < 0] <- 0
      x[] <- as.integer(x) / 255
      x
    }
    
    content_layers <- c("block5_conv2")
    style_layers = c("block1_conv1",
                     "block2_conv1",
                     "block3_conv1",
                     "block4_conv1",
                     "block5_conv1")
    num_content_layers <- length(content_layers)
    num_style_layers <- length(style_layers)
    
    get_model <- function() {
      vgg <- application_vgg19(include_top = FALSE, weights = "imagenet")
      vgg$trainable <- FALSE
      style_outputs <-
        map(style_layers, function(layer)
          vgg$get_layer(layer)$output)
      content_outputs <-
        map(content_layers, function(layer)
          vgg$get_layer(layer)$output)
      model_outputs <- c(style_outputs, content_outputs)
      keras_model(vgg$input, model_outputs)
    }
    
    content_loss <- function(content_image, target) {
      k_sum(k_square(target - content_image))
    }
    
    gram_matrix <- function(x) {
      features <- k_batch_flatten(k_permute_dimensions(x, c(3, 1, 2)))
      gram <- k_dot(features, k_transpose(features))
      gram
    }
    
    style_loss <- function(gram_target, combination) {
      gram_comb <- gram_matrix(combination)
      k_sum(k_square(gram_target - gram_comb)) / (4 * (img_shape[3] ^ 2) * (img_shape[1] * img_shape[2]) ^
                                                    2)
    }
    
    total_variation_loss <- function(image) {
      y_ij  <- image[1:(img_shape[1] - 1L), 1:(img_shape[2] - 1L),]
      y_i1j <- image[2:(img_shape[1]), 1:(img_shape[2] - 1L),]
      y_ij1 <- image[1:(img_shape[1] - 1L), 2:(img_shape[2]),]
      a <- k_square(y_ij - y_i1j)
      b <- k_square(y_ij - y_ij1)
      k_sum(k_pow(a + b, 1.25))
    }
    
    get_feature_representations <-
      function(model, content_path, style_path) {
        # dim == (1, 128, 128, 3)
        style_image <-
          load_and_process_image(style_path) %>% k_cast("float32")
        # dim == (1, 128, 128, 3)
        content_image <-
          load_and_process_image(content_path) %>% k_cast("float32")
        # dim == (2, 128, 128, 3)
        stack_images <-
          k_concatenate(list(style_image, content_image), axis = 1)
        # length(model_outputs) == 6
        # dim(model_outputs[[1]]) = (2, 128, 128, 64)
        # dim(model_outputs[[6]]) = (2, 8, 8, 512)
        model_outputs <- model(stack_images)
        style_features <- model_outputs[1:num_style_layers] %>%
          map(function(batch)
            batch[1, , , ])
        content_features <-
          model_outputs[(num_style_layers + 1):(num_style_layers + num_content_layers)] %>%
          map(function(batch)
            batch[2, , , ])
        list(style_features, content_features)
      }
    
    compute_loss <-
      function(model,
               loss_weights,
               init_image,
               gram_style_features,
               content_features) {
        c(style_weight, content_weight) %<-% loss_weights
        model_outputs <- model(init_image)
        style_output_features <- model_outputs[1:num_style_layers]
        content_output_features <-
          model_outputs[(num_style_layers + 1):(num_style_layers + num_content_layers)]
        
        weight_per_style_layer <- 1 / num_style_layers
        style_score <- 0
        # str(style_zip, max.level = 1)
        # dim(style_zip[[5]][[1]]) == (512, 512)
        style_zip <-
          transpose(list(gram_style_features, style_output_features))
        for (l in 1:length(style_zip)) {
          # for l == 1:
          # dim(target_style) == (64, 64)
          # dim(comb_style) == (1, 128, 128, 64)
          c(target_style, comb_style) %<-% style_zip[[l]]
          style_score <-
            style_score + weight_per_style_layer * style_loss(target_style, comb_style[1, , , ])
        }
        
        weight_per_content_layer <- 1 / num_content_layers
        content_score <- 0
        content_zip <-
          transpose(list(content_features, content_output_features))
        for (l in 1:length(content_zip)) {
          # dim(comb_content) ==  (1, 8, 8, 512)
          # dim(target_content) == (8, 8, 512)
          c(target_content, comb_content) %<-% content_zip[[l]]
          content_score <-
            content_score + weight_per_content_layer * content_loss(comb_content[1, , , ], target_content)
        }
        
        variation_loss <- total_variation_loss(init_image[1, , ,])
        style_score <- style_score * style_weight
        content_score <- content_score * content_weight
        variation_score <- variation_loss * total_variation_weight
        
        loss <- style_score + content_score + variation_score
        list(loss, style_score, content_score, variation_score)
      }
    
    compute_grads <-
      function(model,
               loss_weights,
               init_image,
               gram_style_features,
               content_features) {
        with(tf$GradientTape() %as% tape, {
          scores <-
            compute_loss(model,
                         loss_weights,
                         init_image,
                         gram_style_features,
                         content_features)
        })
        total_loss <- scores[[1]]
        list(tape$gradient(total_loss, init_image), scores)
      }
    
    run_style_transfer <- function(content_path,
                                   style_path) {
      model <- get_model()
      walk(model$layers, function(layer)
        layer$trainable = FALSE)
      
      c(style_features, content_features) %<-% get_feature_representations(model, content_path, style_path)
      # dim(gram_style_features[[1]]) == (64, 64)
      # we compute this once, in advance
      gram_style_features <-
        map(style_features, function(feature)
          gram_matrix(feature))
      
      init_image <- load_and_process_image(content_path)
      init_image <-
        tf$contrib$eager$Variable(init_image, dtype = "float32")
      
      optimizer <-
        tf$train$AdamOptimizer(learning_rate = 1,
                               beta1 = 0.99,
                               epsilon = 1e-1)
      
      c(best_loss, best_image) %<-% list(Inf, NULL)
      loss_weights <- list(style_weight, content_weight)
      
      start_time <- Sys.time()
      global_start <- Sys.time()
      
      norm_means <- c(103.939, 116.779, 123.68)
      min_vals <- -norm_means
      max_vals <- 255 - norm_means
      
      for (i in seq_len(num_iterations)) {
        # dim(grads) == (1, 128, 128, 3)
        c(grads, all_losses) %<-% compute_grads(model,
                                                loss_weights,
                                                init_image,
                                                gram_style_features,
                                                content_features)
        c(loss, style_score, content_score, variation_score) %<-% all_losses
        optimizer$apply_gradients(list(tuple(grads, init_image)))
        clipped <- tf$clip_by_value(init_image, min_vals, max_vals)
        init_image$assign(clipped)
        
        end_time <- Sys.time()
        
        if (k_cast_to_floatx(loss) < best_loss) {
          best_loss <- k_cast_to_floatx(loss)
          best_image <- init_image
        }
        
        if (i %% 50 == 0) {
          glue("Iteration: {i}") %>% print()
          glue(
            "Total loss: {k_cast_to_floatx(loss)}, style loss: {k_cast_to_floatx(style_score)},
            content loss: {k_cast_to_floatx(content_score)}, total variation loss: {k_cast_to_floatx(variation_score)},
            time for 1 iteration: {(Sys.time() - start_time) %>% round(2)}"
          ) %>% print()
          
          if (i %% 100 == 0) {
            png(paste0("style_epoch_", i, ".png"))
            plot_image <- best_image$numpy()
            plot_image <- deprocess_image(plot_image)
            plot(as.raster(plot_image), main = glue("Iteration {i}"))
            dev.off()
          }
        }
      }
      
      glue("Total time: {Sys.time() - global_start} seconds") %>% print()
      list(best_image, best_loss)
    }
    
    c(best_image, best_loss) %<-% run_style_transfer(content_path, style_path)