Trains a memory network on the bAbI dataset for reading comprehension.
Trains a memory network on the bAbI dataset.
References:
Jason Weston, Antoine Bordes, Sumit Chopra, Tomas Mikolov, Alexander M. Rush, “Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks”, http://arxiv.org/abs/1502.05698
Sainbayar Sukhbaatar, Arthur Szlam, Jason Weston, Rob Fergus, “End-To-End Memory Networks”, http://arxiv.org/abs/1503.08895
Reaches 98.6% accuracy on task ‘single_supporting_fact_10k’ after 120 epochs. Time per epoch: 3s on CPU (core i7).
untar(path, exdir =str_replace(path, fixed(".tar.gz"), "/"))path <-str_replace(path, fixed(".tar.gz"), "/")# Reading training and test datatrain <-read_lines(sprintf(challenge, path, "train")) %>%parse_stories() %>%filter(map_int(story, ~length(.x)) <= max_length)
`summarise()` has grouped output by 'story_id', 'nid', 'question'. You can
override using the `.groups` argument.
test <-read_lines(sprintf(challenge, path, "test")) %>%parse_stories() %>%filter(map_int(story, ~length(.x)) <= max_length)
`summarise()` has grouped output by 'story_id', 'nid', 'question'. You can
override using the `.groups` argument.
# Extract the vocabularyall_data <-bind_rows(train, test)vocab <-c(unlist(all_data$question), all_data$answer, unlist(all_data$story)) %>%unique() %>%sort()# Reserve 0 for masking via pad_sequencesvocab_size <-length(vocab) +1story_maxlen <-map_int(all_data$story, ~length(.x)) %>%max()query_maxlen <-map_int(all_data$question, ~length(.x)) %>%max()# Vectorized versions of training and test setstrain_vec <-vectorize_stories(train, vocab, story_maxlen, query_maxlen)test_vec <-vectorize_stories(test, vocab, story_maxlen, query_maxlen)# Defining the model ------------------------------------------------------# Placeholderssequence <-layer_input(shape =c(story_maxlen))question <-layer_input(shape =c(query_maxlen))# Encoders# Embed the input sequence into a sequence of vectorssequence_encoder_m <-keras_model_sequential()sequence_encoder_m %>%layer_embedding(input_dim = vocab_size, output_dim =64) %>%layer_dropout(rate =0.3)# output: (samples, story_maxlen, embedding_dim)# Embed the input into a sequence of vectors of size query_maxlensequence_encoder_c <-keras_model_sequential()sequence_encoder_c %>%layer_embedding(input_dim = vocab_size, output_dim = query_maxlen) %>%layer_dropout(rate =0.3)# output: (samples, story_maxlen, query_maxlen)# Embed the question into a sequence of vectorsquestion_encoder <-keras_model_sequential()question_encoder %>%layer_embedding(input_dim = vocab_size, output_dim =64, input_length = query_maxlen) %>%layer_dropout(rate =0.3)# output: (samples, query_maxlen, embedding_dim)# Encode input sequence and questions (which are indices)# to sequences of dense vectorssequence_encoded_m <-sequence_encoder_m(sequence)sequence_encoded_c <-sequence_encoder_c(sequence)question_encoded <-question_encoder(question)# Compute a 'match' between the first input vector sequence# and the question vector sequence# shape: `(samples, story_maxlen, query_maxlen)`dot <-layer_dot(axes =c(2,2))match <-list(sequence_encoded_m, question_encoded) %>%dot() %>%layer_activation("softmax")# Add the match matrix with the second input vector sequenceresponse <-list(match, sequence_encoded_c) %>%layer_add() %>%layer_permute(c(2,1))# Concatenate the match matrix with the question vector sequenceanswer <-list(response, question_encoded) %>%layer_concatenate() %>%# The original paper uses a matrix multiplication for this reduction step.# We choose to use an RNN instead.layer_lstm(32) %>%# One regularization layer -- more would probably be needed.layer_dropout(rate =0.3) %>%layer_dense(vocab_size) %>%# We output a probability distribution over the vocabularylayer_activation("softmax")# Build the final modelmodel <-keras_model(inputs =list(sequence, question), answer)model %>%compile(optimizer ="rmsprop",loss ="categorical_crossentropy",metrics ="accuracy")# Training ----------------------------------------------------------------model %>%fit(x =list(train_vec$stories, train_vec$questions),y = train_vec$answers,batch_size =32,epochs =120,validation_data =list(list(test_vec$stories, test_vec$questions), test_vec$answers))