caffe2/python/crf.py - platform/external/pytorch - Git at Google

 # Copyright (c) 2016-present, Facebook, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 ##############################################################################

 ## @package crf
 # Module caffe2.python.crf
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals
 from caffe2.python import core, recurrent, model_helper, brew
 import numpy as np

 '''
 Due to a limitation in ReccurentNetworkOp, this layer only supports batch_size=1
 In order to support batch_size > 1, we will have to implement the CRFUnit
 and its gradient in C++ and handle the different batches there.
 '''


 class CRFWithLoss(object):
     def __init__(self, model, num_classes, transitions_blob=None):
         self.model = model
         self.num_classes = num_classes
         self.num_classes_padded = num_classes + 2  # After adding BOS and EOS
         if not transitions_blob:
             transitions_blob = self.model.param_init_net.UniformFill(
                 [],
                 [core.ScopedBlobReference('crf_transitions')],
                 shape=[self.num_classes_padded, self.num_classes_padded],
                 min=-1.0,
                 max=1.0
             )
         self.transitions = transitions_blob
         self.model.params.append(self.transitions)

     def crf_loss(self, predictions, labels, seq_lengths=None):
         # Since the transitions matrix is a shared parameter, need to
         # take a snapshot of it at the beginning since it can be updated
         # in between the operators that uses it when doing parallel updates
         transitions_snapshot = self.model.net.Copy(
             self.transitions, core.ScopedBlobReference('transitions_snapshot')
         )
         # Compute best path unary score from the logits
         path_unary_score = self._gather_entries_sum(
             predictions, labels, self.num_classes
         )
         # Append BOS and EOS entries to the predictions and labels
         predictions = self._pad_predictions(predictions)
         labels = self._pad_labels(labels)
         # Compute best path binary scores from the transitions matrix
         path_binary_score = self._path_binary_scores(
             labels, transitions_snapshot, seq_lengths
         )
         path_total_score = self.model.net.Add(
             [path_binary_score, path_unary_score],
             core.ScopedBlobReference('path_total')
         )
         # Compute all paths score
         zero_index = self.model.param_init_net.ConstantFill(
             [], shape=[1], value=0
         )
         initial_state = self.model.net.Gather(
             [predictions, zero_index],
             core.ScopedBlobReference('rnn_initial'),
             dense_gradient=True
         )
         input_data, _ = self.model.net.RemovePadding(
             [predictions],
             padding_width=1,
             end_padding_width=0,
             outputs=2,
         )
         input_data = self.model.net.ExpandDims(
             [input_data],
             core.ScopedBlobReference('rnn_input_data'),
             dims=[1]
         )
         # Due to a bug in RecurrentNetworkGradientOp, we need to copy the
         # transitions blob before sending it to the recurrent network
         transitions_copy = self.model.net.Copy(
             transitions_snapshot, core.ScopedBlobReference('transitions_copy')
         )
         all_paths_scores = self._crf_forward(
             input_data, initial_state, transitions_copy
         )
         loss = self.model.net.Sub(
             [all_paths_scores, path_total_score],
             core.ScopedBlobReference('crf_loss')
         )
         return loss

     def _pad_predictions(self, predictions):
         # This function will introduce two labels for beginning of sequence
         # And end of sequence, it will make the necessary udpates to the
         # the predictions blob

         low_score = -1000.0  # An arbitray very low number
         b_scores = np.array(
             [[low_score] * self.num_classes + [0, low_score]]
         ).astype(np.float32)

         e_scores = np.array(
             [[low_score] * self.num_classes + [low_score, 0]]
         ).astype(np.float32)

         b_scores = self.model.param_init_net.GivenTensorFill(
             [], "b_scores", shape=[1, self.num_classes_padded], values=b_scores
         )
         e_scores = self.model.param_init_net.GivenTensorFill(
             [], "e_scores", shape=[1, self.num_classes_padded], values=e_scores
         )

         zero_index = self.model.net.ConstantFill(
             [], shape=[1, ], value=0
         )
         length = self.model.net.Gather(
             [self.model.net.Shape([predictions]), zero_index],
         )
         length = self.model.net.Cast(length, to='int32')
         t_range = self.model.net.LengthsRangeFill(length)
         padding = self.model.net.ConstantFill([t_range], value=low_score)
         padding = self.model.net.ExpandDims(padding, dims=[1])
         padded_predictions, _ = self.model.net.Concat(
             [predictions, padding, padding],
             outputs=2,
             axis=1
         )
         padded_predictions_concat, _ = self.model.net.Concat(
             [b_scores, padded_predictions, e_scores],
             outputs=2,
             axis=0
         )
         return padded_predictions_concat

     def _pad_labels(self, labels):
         bos_i = self.num_classes
         eos_i = self.num_classes + 1
         bos_i_b = self.model.param_init_net.ConstantFill(
             [], shape=[1], value=bos_i
         )
         eos_i_b = self.model.param_init_net.ConstantFill(
             [], shape=[1], value=eos_i
         )
         labels = self.model.net.Cast([labels], to='int64')
         padded_labels, _ = self.model.net.Concat(
             [bos_i_b, labels, eos_i_b],
             axis=0,
             outputs=2
         )
         return padded_labels

     def _path_binary_scores(self, labels, transitions, seq_lengths=None):
         column_ids, _ = self.model.net.RemovePadding(
             [labels],
             outputs=2,
             padding_width=1,
             end_padding_width=0
         )
         row_ids, _ = self.model.net.RemovePadding(
             [labels],
             outputs=2,
             padding_width=0,
             end_padding_width=1
         )
         # Since there is no multi-dimensional gather, I flatten the matrix to
         # a 1-d vector and transform the ids to (row_ids * num_columns +
         # column_ids) and do gather in 1-d
         num_columns_blob = self.model.net.ConstantFill(
             [row_ids],
             value=self.num_classes_padded,
         )
         flattened_ids = self.model.net.Mul([row_ids, num_columns_blob])
         flattened_ids = self.model.net.Add([flattened_ids, column_ids])
         flattened_transitions = self.model.net.FlattenToVec([transitions])
         entries = self.model.net.Gather(
             [flattened_transitions, flattened_ids],
             dense_gradient=True
         )
         return self.model.ReduceFrontSum(entries)

     def _gather_entries_sum(self, in_data, indices, index_size):
         indices = self.model.net.Cast([indices], to='int64')
         index_size_blob = self.model.param_init_net.ConstantFill(
             [],
             shape=[1],
             value=index_size,
         )
         query_one_hot = self.model.net.OneHot(
             [indices, index_size_blob]
         )
         flattend_query = self.model.net.FlattenToVec(query_one_hot)
         flattend_data = self.model.net.FlattenToVec(in_data)
         query_scores = self.model.net.DotProduct(
             [flattend_query, flattend_data]
         )
         final_sum = self.model.net.ReduceFrontSum([query_scores])
         return final_sum

     def _crf_forward(
         self,
         input_blob,
         initial_state,
         transitions_copy,
         seq_lengths=None
     ):
         # Build the RNN net and get the last timestep output
         out_last = self.build_crf_net(
             input_blob, initial_state, transitions_copy
         )
         out_last, _ = self.model.net.Reshape(
             [out_last],
             outputs=2,
             shape=(self.num_classes_padded,)
         )
         zero_segment_id = self.model.param_init_net.ConstantFill(
             [],
             value=0,
             shape=[self.num_classes_padded],
             dtype=core.DataType.INT32,
         )

         # Compute the accumlated total score of all the paths
         accum_score = self.model.net.SortedSegmentRangeLogSumExp(
             [out_last, zero_segment_id]
         )
         accum_score, _ = self.model.net.Reshape(
             accum_score,
             outputs=2,
             shape=()
         )
         return accum_score

     def build_crf_net(self, input_blob, initial_state, transitions):
             '''
             Adds the crf_net recurrent operator to the model.

             model: model_helper.ModelHelper object new operators would be added
             to

             input_blob: the input sequence in a format T x N x D
             where T is sequence size, N - batch size and D - input dimention
             ##Only supports batch-size 1##

             seq_lengths: blob containing sequence lengths (unused)
             '''

             scope = 'crf_net'

             def s(name):
                 ''
                 # We have to manually scope due to our internal/external blob
                 # relationships.
                 return "{}/{}".format(str(scope), str(name))

             step_model = model_helper.ModelHelper(name='crf_step',
                                                   param_model=self.model)
             input_t, cell_t_prev, _ = (
                 step_model.net.AddExternalInputs(
                     core.ScopedBlobReference('input_t'),
                     core.ScopedBlobReference('cell_t_prev'),
                     transitions
                 )
             )
             zero_segment_id = step_model.param_init_net.ConstantFill(
                 [],
                 [s('zero_segment_id')],
                 value=0,
                 shape=[self.num_classes_padded],
                 dtype=core.DataType.INT32,
             )

             # A hack to bypass model cloning for test
             step_model.param_init_net.AddExternalOutput(zero_segment_id)
             """ the CRF step """
             # Do tile
             prev_transpose = brew.transpose(
                 step_model,
                 cell_t_prev,
                 [s('prev_transpose')],
                 axes=(0, 2, 1),
             )
             prev_tiled = step_model.net.Tile(
                 prev_transpose,
                 [s('prev_tiled')],
                 tiles=self.num_classes_padded,
                 axis=2,
             )
             input_t_tiled = step_model.net.Tile(
                 input_t,
                 [s('input_t_tiled')],
                 tiles=self.num_classes_padded,
                 axis=1,
             )
             input_with_prev = step_model.net.Add(
                 [prev_tiled, input_t_tiled],
                 [s('input_with_prev')]
             )
             all_with_transitions = step_model.net.Add(
                 [input_with_prev, transitions],
                 [s('prev_with_transitions')],
                 broadcast=1,
                 use_grad_hack=1,
             )
             all_with_transitions_reshaped, _ = step_model.net.Reshape(
                 all_with_transitions,
                 [s('all_with_transitions_reshaped'), s('all_with_transitions_orig')],
                 shape=(self.num_classes_padded, self.num_classes_padded)
             )
             cell_t = step_model.net.SortedSegmentRangeLogSumExp(
                 [all_with_transitions_reshaped, zero_segment_id],
                 [s('cell_t')],
             )
             step_model.net.AddExternalOutputs(cell_t)
             """ recurrent network """
             cell_input_blob = initial_state
             out_all, out_last = recurrent.recurrent_net(
                 net=self.model.net,
                 cell_net=step_model.net,
                 inputs=[(input_t, input_blob)],
                 initial_cell_inputs=[
                     (cell_t_prev, cell_input_blob),
                 ],
                 links={
                     cell_t_prev: cell_t,
                 },
                 scope=scope,
                 outputs_with_grads=(1,)
             )
             return out_last

     def update_predictions(self, classes):

         def crf_update_predictions_op(inputs, outputs):
             # This operator will compute the best path of classes by performing
             # Viterbi decoding and then updates the predictions to make the tag
             # On the best path has the highest score among the others
             predictions = inputs[0].data
             transitions = inputs[1].data
             predictions = inputs[0].data
             predictions_shape = inputs[0].shape
             outputs[0].reshape(predictions_shape)

             trellis = np.zeros(predictions_shape)
             backpointers = np.zeros(predictions_shape, dtype=np.int32)
             trellis[0] = predictions[0]

             for t in range(1, predictions_shape[0]):
                 v = np.expand_dims(trellis[t - 1], 1) + transitions
                 trellis[t] = predictions[t] + np.max(v, 0)
                 backpointers[t] = np.argmax(v, 0)

             viterbi = [np.argmax(trellis[-1])]
             for bp in reversed(backpointers[1:]):
                 viterbi.append(bp[viterbi[-1]])
             viterbi.reverse()

             new_predictions = np.zeros(predictions_shape)
             old_bests = []
             for i, w_predictions in enumerate(predictions):
                 # Get the current tag with the maximum score
                 new_predictions[i] = predictions[i]
                 old_best = np.argmax(w_predictions)
                 old_bests.append(old_best)
                 # Swap the scores of the current best tag and the tag on the
                 # Viterbi path
                 w_predictions[viterbi[i]], w_predictions[old_best] = \
                     w_predictions[old_best], w_predictions[viterbi[i]]
                 new_predictions[i] = w_predictions
             # Remove the BOS and EOS entries from the predictions matrix
             orig_predictions = new_predictions[1:-1, 0:-2]
             outputs[0].reshape(orig_predictions.shape)
             outputs[0].data[...] = orig_predictions
         padded_classes = self._pad_predictions(classes)
         new_classes = self.model.net.Python(crf_update_predictions_op)(
             [padded_classes, self.transitions],
             core.ScopedBlobReference('post_crf_classes')
         )
         return new_classes
	# Copyright (c) 2016-present, Facebook, Inc.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	##############################################################################

	## @package crf
	# Module caffe2.python.crf
	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function
	from __future__ import unicode_literals
	from caffe2.python import core, recurrent, model_helper, brew
	import numpy as np

	'''
	Due to a limitation in ReccurentNetworkOp, this layer only supports batch_size=1
	In order to support batch_size > 1, we will have to implement the CRFUnit
	and its gradient in C++ and handle the different batches there.
	'''


	class CRFWithLoss(object):
	def __init__(self, model, num_classes, transitions_blob=None):
	self.model = model
	self.num_classes = num_classes
	self.num_classes_padded = num_classes + 2 # After adding BOS and EOS
	if not transitions_blob:
	transitions_blob = self.model.param_init_net.UniformFill(
	[],
	[core.ScopedBlobReference('crf_transitions')],
	shape=[self.num_classes_padded, self.num_classes_padded],
	min=-1.0,
	max=1.0
	)
	self.transitions = transitions_blob
	self.model.params.append(self.transitions)

	def crf_loss(self, predictions, labels, seq_lengths=None):
	# Since the transitions matrix is a shared parameter, need to
	# take a snapshot of it at the beginning since it can be updated
	# in between the operators that uses it when doing parallel updates
	transitions_snapshot = self.model.net.Copy(
	self.transitions, core.ScopedBlobReference('transitions_snapshot')
	)
	# Compute best path unary score from the logits
	path_unary_score = self._gather_entries_sum(
	predictions, labels, self.num_classes
	)
	# Append BOS and EOS entries to the predictions and labels
	predictions = self._pad_predictions(predictions)
	labels = self._pad_labels(labels)
	# Compute best path binary scores from the transitions matrix
	path_binary_score = self._path_binary_scores(
	labels, transitions_snapshot, seq_lengths
	)
	path_total_score = self.model.net.Add(
	[path_binary_score, path_unary_score],
	core.ScopedBlobReference('path_total')
	)
	# Compute all paths score
	zero_index = self.model.param_init_net.ConstantFill(
	[], shape=[1], value=0
	)
	initial_state = self.model.net.Gather(
	[predictions, zero_index],
	core.ScopedBlobReference('rnn_initial'),
	dense_gradient=True
	)
	input_data, _ = self.model.net.RemovePadding(
	[predictions],
	padding_width=1,
	end_padding_width=0,
	outputs=2,
	)
	input_data = self.model.net.ExpandDims(
	[input_data],
	core.ScopedBlobReference('rnn_input_data'),
	dims=[1]
	)
	# Due to a bug in RecurrentNetworkGradientOp, we need to copy the
	# transitions blob before sending it to the recurrent network
	transitions_copy = self.model.net.Copy(
	transitions_snapshot, core.ScopedBlobReference('transitions_copy')
	)
	all_paths_scores = self._crf_forward(
	input_data, initial_state, transitions_copy
	)
	loss = self.model.net.Sub(
	[all_paths_scores, path_total_score],
	core.ScopedBlobReference('crf_loss')
	)
	return loss

	def _pad_predictions(self, predictions):
	# This function will introduce two labels for beginning of sequence
	# And end of sequence, it will make the necessary udpates to the
	# the predictions blob

	low_score = -1000.0 # An arbitray very low number
	b_scores = np.array(
	[[low_score] * self.num_classes + [0, low_score]]
	).astype(np.float32)

	e_scores = np.array(
	[[low_score] * self.num_classes + [low_score, 0]]
	).astype(np.float32)

	b_scores = self.model.param_init_net.GivenTensorFill(
	[], "b_scores", shape=[1, self.num_classes_padded], values=b_scores
	)
	e_scores = self.model.param_init_net.GivenTensorFill(
	[], "e_scores", shape=[1, self.num_classes_padded], values=e_scores
	)

	zero_index = self.model.net.ConstantFill(
	[], shape=[1, ], value=0
	)
	length = self.model.net.Gather(
	[self.model.net.Shape([predictions]), zero_index],
	)
	length = self.model.net.Cast(length, to='int32')
	t_range = self.model.net.LengthsRangeFill(length)
	padding = self.model.net.ConstantFill([t_range], value=low_score)
	padding = self.model.net.ExpandDims(padding, dims=[1])
	padded_predictions, _ = self.model.net.Concat(
	[predictions, padding, padding],
	outputs=2,
	axis=1
	)
	padded_predictions_concat, _ = self.model.net.Concat(
	[b_scores, padded_predictions, e_scores],
	outputs=2,
	axis=0
	)
	return padded_predictions_concat

	def _pad_labels(self, labels):
	bos_i = self.num_classes
	eos_i = self.num_classes + 1
	bos_i_b = self.model.param_init_net.ConstantFill(
	[], shape=[1], value=bos_i
	)
	eos_i_b = self.model.param_init_net.ConstantFill(
	[], shape=[1], value=eos_i
	)
	labels = self.model.net.Cast([labels], to='int64')
	padded_labels, _ = self.model.net.Concat(
	[bos_i_b, labels, eos_i_b],
	axis=0,
	outputs=2
	)
	return padded_labels

	def _path_binary_scores(self, labels, transitions, seq_lengths=None):
	column_ids, _ = self.model.net.RemovePadding(
	[labels],
	outputs=2,
	padding_width=1,
	end_padding_width=0
	)
	row_ids, _ = self.model.net.RemovePadding(
	[labels],
	outputs=2,
	padding_width=0,
	end_padding_width=1
	)
	# Since there is no multi-dimensional gather, I flatten the matrix to
	# a 1-d vector and transform the ids to (row_ids * num_columns +
	# column_ids) and do gather in 1-d
	num_columns_blob = self.model.net.ConstantFill(
	[row_ids],
	value=self.num_classes_padded,
	)
	flattened_ids = self.model.net.Mul([row_ids, num_columns_blob])
	flattened_ids = self.model.net.Add([flattened_ids, column_ids])
	flattened_transitions = self.model.net.FlattenToVec([transitions])
	entries = self.model.net.Gather(
	[flattened_transitions, flattened_ids],
	dense_gradient=True
	)
	return self.model.ReduceFrontSum(entries)

	def _gather_entries_sum(self, in_data, indices, index_size):
	indices = self.model.net.Cast([indices], to='int64')
	index_size_blob = self.model.param_init_net.ConstantFill(
	[],
	shape=[1],
	value=index_size,
	)
	query_one_hot = self.model.net.OneHot(
	[indices, index_size_blob]
	)
	flattend_query = self.model.net.FlattenToVec(query_one_hot)
	flattend_data = self.model.net.FlattenToVec(in_data)
	query_scores = self.model.net.DotProduct(
	[flattend_query, flattend_data]
	)
	final_sum = self.model.net.ReduceFrontSum([query_scores])
	return final_sum

	def _crf_forward(
	self,
	input_blob,
	initial_state,
	transitions_copy,
	seq_lengths=None
	):
	# Build the RNN net and get the last timestep output
	out_last = self.build_crf_net(
	input_blob, initial_state, transitions_copy
	)
	out_last, _ = self.model.net.Reshape(
	[out_last],
	outputs=2,
	shape=(self.num_classes_padded,)
	)
	zero_segment_id = self.model.param_init_net.ConstantFill(
	[],
	value=0,
	shape=[self.num_classes_padded],
	dtype=core.DataType.INT32,
	)

	# Compute the accumlated total score of all the paths
	accum_score = self.model.net.SortedSegmentRangeLogSumExp(
	[out_last, zero_segment_id]
	)
	accum_score, _ = self.model.net.Reshape(
	accum_score,
	outputs=2,
	shape=()
	)
	return accum_score

	def build_crf_net(self, input_blob, initial_state, transitions):
	'''
	Adds the crf_net recurrent operator to the model.

	model: model_helper.ModelHelper object new operators would be added
	to

	input_blob: the input sequence in a format T x N x D
	where T is sequence size, N - batch size and D - input dimention
	##Only supports batch-size 1##

	seq_lengths: blob containing sequence lengths (unused)
	'''

	scope = 'crf_net'

	def s(name):
	''
	# We have to manually scope due to our internal/external blob
	# relationships.
	return "{}/{}".format(str(scope), str(name))

	step_model = model_helper.ModelHelper(name='crf_step',
	param_model=self.model)
	input_t, cell_t_prev, _ = (
	step_model.net.AddExternalInputs(
	core.ScopedBlobReference('input_t'),
	core.ScopedBlobReference('cell_t_prev'),
	transitions
	)
	)
	zero_segment_id = step_model.param_init_net.ConstantFill(
	[],
	[s('zero_segment_id')],
	value=0,
	shape=[self.num_classes_padded],
	dtype=core.DataType.INT32,
	)

	# A hack to bypass model cloning for test
	step_model.param_init_net.AddExternalOutput(zero_segment_id)
	""" the CRF step """
	# Do tile
	prev_transpose = brew.transpose(
	step_model,
	cell_t_prev,
	[s('prev_transpose')],
	axes=(0, 2, 1),
	)
	prev_tiled = step_model.net.Tile(
	prev_transpose,
	[s('prev_tiled')],
	tiles=self.num_classes_padded,
	axis=2,
	)
	input_t_tiled = step_model.net.Tile(
	input_t,
	[s('input_t_tiled')],
	tiles=self.num_classes_padded,
	axis=1,
	)
	input_with_prev = step_model.net.Add(
	[prev_tiled, input_t_tiled],
	[s('input_with_prev')]
	)
	all_with_transitions = step_model.net.Add(
	[input_with_prev, transitions],
	[s('prev_with_transitions')],
	broadcast=1,
	use_grad_hack=1,
	)
	all_with_transitions_reshaped, _ = step_model.net.Reshape(
	all_with_transitions,
	[s('all_with_transitions_reshaped'), s('all_with_transitions_orig')],
	shape=(self.num_classes_padded, self.num_classes_padded)
	)
	cell_t = step_model.net.SortedSegmentRangeLogSumExp(
	[all_with_transitions_reshaped, zero_segment_id],
	[s('cell_t')],
	)
	step_model.net.AddExternalOutputs(cell_t)
	""" recurrent network """
	cell_input_blob = initial_state
	out_all, out_last = recurrent.recurrent_net(
	net=self.model.net,
	cell_net=step_model.net,
	inputs=[(input_t, input_blob)],
	initial_cell_inputs=[
	(cell_t_prev, cell_input_blob),
	],
	links={
	cell_t_prev: cell_t,
	},
	scope=scope,
	outputs_with_grads=(1,)
	)
	return out_last

	def update_predictions(self, classes):

	def crf_update_predictions_op(inputs, outputs):
	# This operator will compute the best path of classes by performing
	# Viterbi decoding and then updates the predictions to make the tag
	# On the best path has the highest score among the others
	predictions = inputs[0].data
	transitions = inputs[1].data
	predictions = inputs[0].data
	predictions_shape = inputs[0].shape
	outputs[0].reshape(predictions_shape)

	trellis = np.zeros(predictions_shape)
	backpointers = np.zeros(predictions_shape, dtype=np.int32)
	trellis[0] = predictions[0]

	for t in range(1, predictions_shape[0]):
	v = np.expand_dims(trellis[t - 1], 1) + transitions
	trellis[t] = predictions[t] + np.max(v, 0)
	backpointers[t] = np.argmax(v, 0)

	viterbi = [np.argmax(trellis[-1])]
	for bp in reversed(backpointers[1:]):
	viterbi.append(bp[viterbi[-1]])
	viterbi.reverse()

	new_predictions = np.zeros(predictions_shape)
	old_bests = []
	for i, w_predictions in enumerate(predictions):
	# Get the current tag with the maximum score
	new_predictions[i] = predictions[i]
	old_best = np.argmax(w_predictions)
	old_bests.append(old_best)
	# Swap the scores of the current best tag and the tag on the
	# Viterbi path
	w_predictions[viterbi[i]], w_predictions[old_best] = \
	w_predictions[old_best], w_predictions[viterbi[i]]
	new_predictions[i] = w_predictions
	# Remove the BOS and EOS entries from the predictions matrix
	orig_predictions = new_predictions[1:-1, 0:-2]
	outputs[0].reshape(orig_predictions.shape)
	outputs[0].data[...] = orig_predictions
	padded_classes = self._pad_predictions(classes)
	new_classes = self.model.net.Python(crf_update_predictions_op)(
	[padded_classes, self.transitions],
	core.ScopedBlobReference('post_crf_classes')
	)
	return new_classes