caffe2/python/attention.py - platform/external/pytorch - Git at Google

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals


 def apply_regular_attention(
     model,
     encoder_output_dim,
     encoder_outputs_transposed,
     weighted_encoder_outputs,
     decoder_hidden_state_t,
     decoder_hidden_state_dim,
     # TODO: we need to provide batch_size for some reshape methods,
     # ideally, we should be able to not specify it
     batch_size,
     scope,
 ):
     def s(name):
         # We have to manually scope due to our internal/external blob
         # relationships.
         return "{}/{}".format(str(scope), str(name))

     # [1, batch_size, encoder_output_dim]
     weighted_decoder_hidden_state = model.FC(
         decoder_hidden_state_t,
         s('weighted_decoder_hidden_state'),
         dim_in=decoder_hidden_state_dim,
         dim_out=encoder_output_dim,
         axis=2,
     )
     # [batch_size, encoder_output_dim]
     weighted_decoder_hidden_state = model.net.Squeeze(
         weighted_decoder_hidden_state,
         weighted_decoder_hidden_state,
         dims=[0],
     )
     # TODO: remove that excessive when RecurrentNetwork supports
     # Sum op at the beginning of step_net
     weighted_encoder_outputs_copy = model.net.Copy(
         weighted_encoder_outputs,
         s('weighted_encoder_outputs_copy'),
     )
     # [encoder_length, batch_size, encoder_output_dim]
     decoder_hidden_encoder_outputs_sum = model.net.Add(
         [weighted_encoder_outputs_copy, weighted_decoder_hidden_state],
         s('decoder_hidden_encoder_outputs_sum'),
         broadcast=1,
         use_grad_hack=1,
     )
     # [encoder_length, batch_size, encoder_output_dim]
     decoder_hidden_encoder_outputs_sum = model.net.Tanh(
         decoder_hidden_encoder_outputs_sum,
         decoder_hidden_encoder_outputs_sum,
     )
     # [encoder_length * batch_size, encoder_output_dim]
     decoder_hidden_encoder_outputs_sum_tanh_2d, _ = model.net.Reshape(
         decoder_hidden_encoder_outputs_sum,
         [
             s('decoder_hidden_encoder_outputs_sum_tanh_2d'),
             s('decoder_hidden_encoder_outputs_sum_tanh_t_old_shape'),
         ],
         shape=[-1, encoder_output_dim],
     )
     attention_v = model.param_init_net.XavierFill(
         [],
         s('attention_v'),
         shape=[encoder_output_dim, 1],
     )
     model.add_param(attention_v)

     # [encoder_length * batch_size, 1]
     attention_logits = model.net.MatMul(
         [decoder_hidden_encoder_outputs_sum_tanh_2d, attention_v],
         s('attention_logits'),
     )
     # [encoder_length, batch_size]
     attention_logits, _ = model.net.Reshape(
         attention_logits,
         [
             attention_logits,
             s('attention_logits_old_shape'),
         ],
         shape=[-1, batch_size],
     )
     # [batch_size, encoder_length]
     attention_logits_transposed = model.net.Transpose(
         attention_logits,
         s('attention_logits_transposed'),
         axes=[1, 0],
     )
     # TODO: we could try to force some attention weights to be zeros,
     # based on encoder_lengths.
     # [batch_size, encoder_length]
     attention_weights = model.Softmax(
         attention_logits_transposed,
         s('attention_weights'),
     )
     # TODO: make this operation in-place
     # [batch_size, encoder_length, 1]
     attention_weights_3d = model.net.ExpandDims(
         attention_weights,
         s('attention_weights_3d'),
         dims=[2],
     )
     # [batch_size, encoder_output_dim, 1]
     attention_weighted_encoder_context = model.net.BatchMatMul(
         [encoder_outputs_transposed, attention_weights_3d],
         s('attention_weighted_encoder_context'),
     )
     # TODO: somehow I cannot use Squeeze in-place op here
     # [batch_size, encoder_output_dim]
     attention_weighted_encoder_context, _ = model.net.Reshape(
         attention_weighted_encoder_context,
         [
             attention_weighted_encoder_context,
             s('attention_weighted_encoder_context_old_shape')
         ],
         shape=[-1, encoder_output_dim],
     )
     return attention_weighted_encoder_context
	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function
	from __future__ import unicode_literals


	def apply_regular_attention(
	model,
	encoder_output_dim,
	encoder_outputs_transposed,
	weighted_encoder_outputs,
	decoder_hidden_state_t,
	decoder_hidden_state_dim,
	# TODO: we need to provide batch_size for some reshape methods,
	# ideally, we should be able to not specify it
	batch_size,
	scope,
	):
	def s(name):
	# We have to manually scope due to our internal/external blob
	# relationships.
	return "{}/{}".format(str(scope), str(name))

	# [1, batch_size, encoder_output_dim]
	weighted_decoder_hidden_state = model.FC(
	decoder_hidden_state_t,
	s('weighted_decoder_hidden_state'),
	dim_in=decoder_hidden_state_dim,
	dim_out=encoder_output_dim,
	axis=2,
	)
	# [batch_size, encoder_output_dim]
	weighted_decoder_hidden_state = model.net.Squeeze(
	weighted_decoder_hidden_state,
	weighted_decoder_hidden_state,
	dims=[0],
	)
	# TODO: remove that excessive when RecurrentNetwork supports
	# Sum op at the beginning of step_net
	weighted_encoder_outputs_copy = model.net.Copy(
	weighted_encoder_outputs,
	s('weighted_encoder_outputs_copy'),
	)
	# [encoder_length, batch_size, encoder_output_dim]
	decoder_hidden_encoder_outputs_sum = model.net.Add(
	[weighted_encoder_outputs_copy, weighted_decoder_hidden_state],
	s('decoder_hidden_encoder_outputs_sum'),
	broadcast=1,
	use_grad_hack=1,
	)
	# [encoder_length, batch_size, encoder_output_dim]
	decoder_hidden_encoder_outputs_sum = model.net.Tanh(
	decoder_hidden_encoder_outputs_sum,
	decoder_hidden_encoder_outputs_sum,
	)
	# [encoder_length * batch_size, encoder_output_dim]
	decoder_hidden_encoder_outputs_sum_tanh_2d, _ = model.net.Reshape(
	decoder_hidden_encoder_outputs_sum,
	[
	s('decoder_hidden_encoder_outputs_sum_tanh_2d'),
	s('decoder_hidden_encoder_outputs_sum_tanh_t_old_shape'),
	],
	shape=[-1, encoder_output_dim],
	)
	attention_v = model.param_init_net.XavierFill(
	[],
	s('attention_v'),
	shape=[encoder_output_dim, 1],
	)
	model.add_param(attention_v)

	# [encoder_length * batch_size, 1]
	attention_logits = model.net.MatMul(
	[decoder_hidden_encoder_outputs_sum_tanh_2d, attention_v],
	s('attention_logits'),
	)
	# [encoder_length, batch_size]
	attention_logits, _ = model.net.Reshape(
	attention_logits,
	[
	attention_logits,
	s('attention_logits_old_shape'),
	],
	shape=[-1, batch_size],
	)
	# [batch_size, encoder_length]
	attention_logits_transposed = model.net.Transpose(
	attention_logits,
	s('attention_logits_transposed'),
	axes=[1, 0],
	)
	# TODO: we could try to force some attention weights to be zeros,
	# based on encoder_lengths.
	# [batch_size, encoder_length]
	attention_weights = model.Softmax(
	attention_logits_transposed,
	s('attention_weights'),
	)
	# TODO: make this operation in-place
	# [batch_size, encoder_length, 1]
	attention_weights_3d = model.net.ExpandDims(
	attention_weights,
	s('attention_weights_3d'),
	dims=[2],
	)
	# [batch_size, encoder_output_dim, 1]
	attention_weighted_encoder_context = model.net.BatchMatMul(
	[encoder_outputs_transposed, attention_weights_3d],
	s('attention_weighted_encoder_context'),
	)
	# TODO: somehow I cannot use Squeeze in-place op here
	# [batch_size, encoder_output_dim]
	attention_weighted_encoder_context, _ = model.net.Reshape(
	attention_weighted_encoder_context,
	[
	attention_weighted_encoder_context,
	s('attention_weighted_encoder_context_old_shape')
	],
	shape=[-1, encoder_output_dim],
	)
	return attention_weighted_encoder_context