neural_style_transfer

    Neural style transfer with Keras.

    It is preferable to run this script on a GPU, for speed.

    Example result: https://twitter.com/fchollet/status/686631033085677568

    Style transfer consists in generating an image with the same “content” as a base image, but with the “style” of a different picture (typically artistic).

    This is achieved through the optimization of a loss function that has 3 components: “style loss”, “content loss”, and “total variation loss”:

    • The total variation loss imposes local spatial continuity between the pixels of the combination image, giving it visual coherence.

    • The style loss is where the deep learning keeps in –that one is defined using a deep convolutional neural network. Precisely, it consists in a sum of L2 distances between the Gram matrices of the representations of the base image and the style reference image, extracted from different layers of a convnet (trained on ImageNet). The general idea is to capture color/texture information at different spatial scales (fairly large scales –defined by the depth of the layer considered).

    • The content loss is a L2 distance between the features of the base image (extracted from a deep layer) and the features of the combination image, keeping the generated image close enough to the original one.

    library(keras)
    library(purrr)
    library(R6)
    
    # Parameters --------------------------------------------------------------
    
    base_image_path <- "neural-style-base-img.png"
    style_reference_image_path <- "neural-style-style.jpg"
    iterations <- 10
    
    # these are the weights of the different loss components
    total_variation_weight <- 1
    style_weight <- 1
    content_weight <- 0.025
    
    # dimensions of the generated picture.
    img <- image_load(base_image_path)
    width <- img$size[[1]]
    height <- img$size[[2]]
    img_nrows <- 400
    img_ncols <- as.integer(width * img_nrows / height)
    
    
    # Functions ---------------------------------------------------------------
    
    # util function to open, resize and format pictures into appropriate tensors
    preprocess_image <- function(path){
      img <- image_load(path, target_size = c(img_nrows, img_ncols)) %>%
        image_to_array() %>%
        array_reshape(c(1, dim(.)))
      imagenet_preprocess_input(img)
    }
    
    # util function to convert a tensor into a valid image
    # also turn BGR into RGB.
    deprocess_image <- function(x){
      x <- x[1,,,]
      # Remove zero-center by mean pixel
      x[,,1] <- x[,,1] + 103.939
      x[,,2] <- x[,,2] + 116.779
      x[,,3] <- x[,,3] + 123.68
      # BGR -> RGB
      x <- x[,,c(3,2,1)]
      # clip to interval 0, 255
      x[x > 255] <- 255
      x[x < 0] <- 0
      x[] <- as.integer(x)/255
      x
    }
    
    
    # Defining the model ------------------------------------------------------
    
    # get tensor representations of our images
    base_image <- k_variable(preprocess_image(base_image_path))
    style_reference_image <- k_variable(preprocess_image(style_reference_image_path))
    
    # this will contain our generated image
    combination_image <- k_placeholder(c(1, img_nrows, img_ncols, 3))
    
    # combine the 3 images into a single Keras tensor
    input_tensor <- k_concatenate(list(base_image, style_reference_image, 
                                       combination_image), axis = 1)
    
    # build the VGG16 network with our 3 images as input
    # the model will be loaded with pre-trained ImageNet weights
    model <- application_vgg16(input_tensor = input_tensor, weights = "imagenet", 
                               include_top = FALSE)
    
    print("Model loaded.")
    
    nms <- map_chr(model$layers, ~.x$name)
    output_dict <- map(model$layers, ~.x$output) %>% set_names(nms)
    
    # compute the neural style loss
    # first we need to define 4 util functions
    
    # the gram matrix of an image tensor (feature-wise outer product)
    
    gram_matrix <- function(x){
      
      features <- x %>%
        k_permute_dimensions(pattern = c(3, 1, 2)) %>%
        k_batch_flatten()
      
      k_dot(features, k_transpose(features))
    }
    
    # the "style loss" is designed to maintain
    # the style of the reference image in the generated image.
    # It is based on the gram matrices (which capture style) of
    # feature maps from the style reference image
    # and from the generated image
    
    style_loss <- function(style, combination){
      S <- gram_matrix(style)
      C <- gram_matrix(combination)
      
      channels <- 3
      size <- img_nrows*img_ncols
      
      k_sum(k_square(S - C)) / (4 * channels^2  * size^2)
    }
    
    # an auxiliary loss function
    # designed to maintain the "content" of the
    # base image in the generated image
    
    content_loss <- function(base, combination){
      k_sum(k_square(combination - base))
    }
    
    # the 3rd loss function, total variation loss,
    # designed to keep the generated image locally coherent
    
    total_variation_loss <- function(x){
      y_ij  <- x[,1:(img_nrows - 1L), 1:(img_ncols - 1L),]
      y_i1j <- x[,2:(img_nrows), 1:(img_ncols - 1L),]
      y_ij1 <- x[,1:(img_nrows - 1L), 2:(img_ncols),]
      
      a <- k_square(y_ij - y_i1j)
      b <- k_square(y_ij - y_ij1)
      k_sum(k_pow(a + b, 1.25))
    }
    
    # combine these loss functions into a single scalar
    loss <- k_variable(0.0)
    layer_features <- output_dict$block4_conv2
    base_image_features <- layer_features[1,,,]
    combination_features <- layer_features[3,,,]
    
    loss <- loss + content_weight*content_loss(base_image_features, 
                                               combination_features)
    
    feature_layers = c('block1_conv1', 'block2_conv1',
                      'block3_conv1', 'block4_conv1',
                      'block5_conv1')
    
    for(layer_name in feature_layers){
      layer_features <- output_dict[[layer_name]]
      style_reference_features <- layer_features[2,,,]
      combination_features <- layer_features[3,,,]
      sl <- style_loss(style_reference_features, combination_features)
      loss <- loss + ((style_weight / length(feature_layers)) * sl)
    }
    
    loss <- loss + (total_variation_weight * total_variation_loss(combination_image))
    
    # get the gradients of the generated image wrt the loss
    grads <- k_gradients(loss, combination_image)[[1]]
    
    f_outputs <- k_function(list(combination_image), list(loss, grads))
    
    eval_loss_and_grads <- function(image){
      image <- array_reshape(image, c(1, img_nrows, img_ncols, 3))
      outs <- f_outputs(list(image))
      list(
        loss_value = outs[[1]],
        grad_values = array_reshape(outs[[2]], dim = length(outs[[2]]))
      )
    }
    
    # Loss and gradients evaluator.
    # 
    # This Evaluator class makes it possible
    # to compute loss and gradients in one pass
    # while retrieving them via two separate functions,
    # "loss" and "grads". This is done because scipy.optimize
    # requires separate functions for loss and gradients,
    # but computing them separately would be inefficient.
    Evaluator <- R6Class(
      "Evaluator",
      public = list(
        
        loss_value = NULL,
        grad_values = NULL,
        
        initialize = function() {
          self$loss_value <- NULL
          self$grad_values <- NULL
        },
        
        loss = function(x){
          loss_and_grad <- eval_loss_and_grads(x)
          self$loss_value <- loss_and_grad$loss_value
          self$grad_values <- loss_and_grad$grad_values
          self$loss_value
        },
        
        grads = function(x){
          grad_values <- self$grad_values
          self$loss_value <- NULL
          self$grad_values <- NULL
          grad_values
        }
        
      )
    )
    
    evaluator <- Evaluator$new()
    
    # run scipy-based optimization (L-BFGS) over the pixels of the generated image
    # so as to minimize the neural style loss
    dms <- c(1, img_nrows, img_ncols, 3)
    x <- array(data = runif(prod(dms), min = 0, max = 255) - 128, dim = dms)
    
    # Run optimization (L-BFGS) over the pixels of the generated image
    # so as to minimize the loss
    for(i in 1:iterations){
    
      # Run L-BFGS
      opt <- optim(
        array_reshape(x, dim = length(x)), fn = evaluator$loss, gr = evaluator$grads, 
        method = "L-BFGS-B",
        control = list(maxit = 15)
      )
      
      # Print loss value
      print(opt$value)
      
      # decode the image
      image <- x <- opt$par
      image <- array_reshape(image, dms)
      
      # plot
      im <- deprocess_image(image)
      plot(as.raster(im))
    }