tools/autograd/gen_autograd_functions.py - platform/external/pytorch - Git at Google

 # Generates C++ autograd functions for the derivatives of ATen operations
 #
 # This writes two files:
 #  Functions.h/cpp: subclasses of autograd::Function
 #  python_functions.h/cpp: Python bindings for the above classes
 #
 import re
 from .utils import nested_dict, CodeTemplate, write
 from .gen_autograd import VIEW_FUNCTIONS, template_path
 from .utils import IDENT_REGEX

 FUNCTIONS_H = CodeTemplate.from_file(template_path + '/Functions.h')
 FUNCTIONS_CPP = CodeTemplate.from_file(template_path + '/Functions.cpp')
 PY_FUNCTIONS_H = CodeTemplate.from_file(template_path + '/python_functions.h')
 PY_FUNCTIONS_CPP = CodeTemplate.from_file(template_path + '/python_functions.cpp')

 FUNCTION_DECLARATION = CodeTemplate("""\
 struct ${op} : public ${superclass} {
   using ${superclass}::${superclass};
   variable_list apply(const variable_list& grads) override;
   std::string name() const override { return "${op}"; }
   void release_variables() override {
     ${release_variables}
   }
   ${will_release_variables}
   ${saved_variables}
   ${saved_list_sizes}
 };
 """)

 WILL_RELEASE_VARIABLES = CodeTemplate("""\
 bool retain_variables = true;
 virtual void will_release_variables() override {
   retain_variables = false;
 }
 """)

 FUNCTION_DEFINITION = CodeTemplate("""\
 variable_list ${op}::apply(const variable_list& grads) {
   IndexRangeGenerator gen;
   ${compute_index_ranges}
   variable_list grad_inputs(gen.size());
   ${body}
   return grad_inputs;
 }
 """)

 PY_FUNCTION_DEFINITION = CodeTemplate("""\
 static PyTypeObject ${op}Class;
 addClass<${op}>(${op}Class, "${op}");
 """)

 GRAD_INPUT_MASK = CodeTemplate("""\
   auto grad_input_mask = std::array<bool, ${n}>{
     ${masks}
   };\
 """)

 DERIVATIVE_SINGLE = CodeTemplate("""\
 if (should_compute_output({ ${name}_ix })) {
   auto grad_result = ${derivative};
   copy_range(grad_inputs, ${name}_ix, grad_result);
 }
 """)

 DERIVATIVE_MULTI = CodeTemplate("""\
 if (should_compute_output({ ${idx_ranges} })) {
   ${grad_input_mask}
   auto grad_result = ${derivative};
   ${copy_ranges}
 }
 """)

 # These functions have backwards which cannot be traced, and so must have
 # their backward functions traced opaquely.
 # VIEW_FUNCTIONS are not traceable because they use as_strided, which
 # has an untraceable backwards, see
 # https://github.com/pytorch/pytorch/issues/4250
 # TODO: This is probably not exhaustive, but it's a start
 UNTRACEABLE_FUNCTIONS = VIEW_FUNCTIONS


 def gen_autograd_functions(out, autograd_functions):
     """Functions.h and Functions.cpp body

     These contain the auto-generated subclasses of torch::autograd::Function
     for each every differentiable torch function.
     """
     function_definitions = []
     function_declarations = []
     py_function_initializers = []

     for func in autograd_functions:
         env = process_function(func)

         function_declarations.append(FUNCTION_DECLARATION.substitute(env))
         function_definitions.append(FUNCTION_DEFINITION.substitute(env))
         py_function_initializers.append(PY_FUNCTION_DEFINITION.substitute(env))

     top_env = {
         'autograd_function_definitions': function_definitions,
         'autograd_function_declarations': function_declarations,
         'py_function_initializers': py_function_initializers,
     }

     write(out, 'Functions.h', FUNCTIONS_H, top_env)
     write(out, 'Functions.cpp', FUNCTIONS_CPP, top_env)
     write(out, 'python_functions.h', PY_FUNCTIONS_H, top_env)
     write(out, 'python_functions.cpp', PY_FUNCTIONS_CPP, top_env)


 def process_function(func):
     env = {}
     saved_variables = []
     release_variables = []
     saved_list_sizes = []
     unpack = []

     env['compute_index_ranges'] = []
     for arg in func['args_with_gradients']:
         if arg['type'] == 'TensorList':
             size = '{}_size_'.format(arg['name'])
             saved_list_sizes.append('size_t {}_size_;'.format(arg['name']))
         else:
             size = '1'
         env['compute_index_ranges'].append('auto {}_ix = gen.range({});'.format(arg['name'], size))

     def save_arg(arg, is_output):
         name = arg['name']
         if arg['type'] == 'Tensor' or (arg['type'] == 'Scalar' and is_output):
             saved_variables.append('SavedVariable {}_;'.format(name))
             release_variables.append('{}_.reset_data();'.format(name))
             ptr = 'shared_from_this()' if is_output else ''
             unpack.append('auto {} = {}_.unpack({});'.format(name, name, ptr))
         elif arg['type'] == 'TensorList':
             saved_variables.append('std::vector<SavedVariable> {}_;'.format(name))
             release_variables.append('{}_.clear();'.format(name))
             unpack.append('auto {} = unpack_list({}_);'.format(name, name))
         elif arg['type'] == 'IntList':
             saved_variables.append('std::vector<int64_t> {};'.format(name))
         else:
             saved_variables.append('{} {};'.format(arg['type'], name))

     for arg in func['saved_inputs']:
         save_arg(arg, is_output=False)
     for arg in func['saved_outputs']:
         save_arg(arg, is_output=True)
     env['saved_variables'] = saved_variables
     env['release_variables'] = release_variables
     env['saved_list_sizes'] = saved_list_sizes

     if uses_retain_variables(func):
         env['will_release_variables'] = WILL_RELEASE_VARIABLES.substitute()
     else:
         env['will_release_variables'] = ''

     body = []

     if uses_single_grad(func):
         body.append('auto& grad = grads[0];')

     def emit_derivative(derivative):
         formula = derivative['formula']
         var_names = derivative['var_names']
         if len(var_names) == 1:
             return DERIVATIVE_SINGLE.substitute(name=var_names[0], derivative=formula)
         else:
             if 'grad_input_mask' in formula:
                 masks = ['should_compute_output({{ {}_ix }}),'.format(n) for n in var_names]
                 grad_input_mask = GRAD_INPUT_MASK.substitute(masks=masks, n=len(var_names))
             else:
                 grad_input_mask = ''
             idx_ranges = ', '.join("{}_ix".format(n) for n in var_names)
             copy_ranges = []
             for i, n in enumerate(var_names):
                 copy_ranges.append("copy_range(grad_inputs, {}_ix, std::get<{}>(grad_result));".format(n, i))
             return DERIVATIVE_MULTI.substitute(
                 idx_ranges=idx_ranges, copy_ranges=copy_ranges,
                 derivative=formula,
                 grad_input_mask=grad_input_mask)

     body.extend(unpack)
     for derivative in func['derivatives']:
         body.append(emit_derivative(derivative))

     env['body'] = body
     if func['name'] in UNTRACEABLE_FUNCTIONS:
         env['superclass'] = 'Function'
     else:
         env['superclass'] = 'TraceableFunction'
     return nested_dict(env, func)


 def uses_ident(func, ident):
     if func is None:
         return False
     for derivative in func['derivatives']:
         formula = derivative['formula']
         if re.search(IDENT_REGEX.format(ident), formula):
             return True
     return False


 def uses_retain_variables(func):
     return uses_ident(func, 'retain_variables')


 def uses_single_grad(func):
     return uses_ident(func, 'grad')
	# Generates C++ autograd functions for the derivatives of ATen operations
	#
	# This writes two files:
	# Functions.h/cpp: subclasses of autograd::Function
	# python_functions.h/cpp: Python bindings for the above classes
	#
	import re
	from .utils import nested_dict, CodeTemplate, write
	from .gen_autograd import VIEW_FUNCTIONS, template_path
	from .utils import IDENT_REGEX

	FUNCTIONS_H = CodeTemplate.from_file(template_path + '/Functions.h')
	FUNCTIONS_CPP = CodeTemplate.from_file(template_path + '/Functions.cpp')
	PY_FUNCTIONS_H = CodeTemplate.from_file(template_path + '/python_functions.h')
	PY_FUNCTIONS_CPP = CodeTemplate.from_file(template_path + '/python_functions.cpp')

	FUNCTION_DECLARATION = CodeTemplate("""\
	struct ${op} : public ${superclass} {
	using ${superclass}::${superclass};
	variable_list apply(const variable_list& grads) override;
	std::string name() const override { return "${op}"; }
	void release_variables() override {
	${release_variables}
	}
	${will_release_variables}
	${saved_variables}
	${saved_list_sizes}
	};
	""")

	WILL_RELEASE_VARIABLES = CodeTemplate("""\
	bool retain_variables = true;
	virtual void will_release_variables() override {
	retain_variables = false;
	}
	""")

	FUNCTION_DEFINITION = CodeTemplate("""\
	variable_list ${op}::apply(const variable_list& grads) {
	IndexRangeGenerator gen;
	${compute_index_ranges}
	variable_list grad_inputs(gen.size());
	${body}
	return grad_inputs;
	}
	""")

	PY_FUNCTION_DEFINITION = CodeTemplate("""\
	static PyTypeObject ${op}Class;
	addClass<${op}>(${op}Class, "${op}");
	""")

	GRAD_INPUT_MASK = CodeTemplate("""\
	auto grad_input_mask = std::array<bool, ${n}>{
	${masks}
	};\
	""")

	DERIVATIVE_SINGLE = CodeTemplate("""\
	if (should_compute_output({ ${name}_ix })) {
	auto grad_result = ${derivative};
	copy_range(grad_inputs, ${name}_ix, grad_result);
	}
	""")

	DERIVATIVE_MULTI = CodeTemplate("""\
	if (should_compute_output({ ${idx_ranges} })) {
	${grad_input_mask}
	auto grad_result = ${derivative};
	${copy_ranges}
	}
	""")

	# These functions have backwards which cannot be traced, and so must have
	# their backward functions traced opaquely.
	# VIEW_FUNCTIONS are not traceable because they use as_strided, which
	# has an untraceable backwards, see
	# https://github.com/pytorch/pytorch/issues/4250
	# TODO: This is probably not exhaustive, but it's a start
	UNTRACEABLE_FUNCTIONS = VIEW_FUNCTIONS


	def gen_autograd_functions(out, autograd_functions):
	"""Functions.h and Functions.cpp body

	These contain the auto-generated subclasses of torch::autograd::Function
	for each every differentiable torch function.
	"""
	function_definitions = []
	function_declarations = []
	py_function_initializers = []

	for func in autograd_functions:
	env = process_function(func)

	function_declarations.append(FUNCTION_DECLARATION.substitute(env))
	function_definitions.append(FUNCTION_DEFINITION.substitute(env))
	py_function_initializers.append(PY_FUNCTION_DEFINITION.substitute(env))

	top_env = {
	'autograd_function_definitions': function_definitions,
	'autograd_function_declarations': function_declarations,
	'py_function_initializers': py_function_initializers,
	}

	write(out, 'Functions.h', FUNCTIONS_H, top_env)
	write(out, 'Functions.cpp', FUNCTIONS_CPP, top_env)
	write(out, 'python_functions.h', PY_FUNCTIONS_H, top_env)
	write(out, 'python_functions.cpp', PY_FUNCTIONS_CPP, top_env)


	def process_function(func):
	env = {}
	saved_variables = []
	release_variables = []
	saved_list_sizes = []
	unpack = []

	env['compute_index_ranges'] = []
	for arg in func['args_with_gradients']:
	if arg['type'] == 'TensorList':
	size = '{}_size_'.format(arg['name'])
	saved_list_sizes.append('size_t {}_size_;'.format(arg['name']))
	else:
	size = '1'
	env['compute_index_ranges'].append('auto {}_ix = gen.range({});'.format(arg['name'], size))

	def save_arg(arg, is_output):
	name = arg['name']
	if arg['type'] == 'Tensor' or (arg['type'] == 'Scalar' and is_output):
	saved_variables.append('SavedVariable {}_;'.format(name))
	release_variables.append('{}_.reset_data();'.format(name))
	ptr = 'shared_from_this()' if is_output else ''
	unpack.append('auto {} = {}_.unpack({});'.format(name, name, ptr))
	elif arg['type'] == 'TensorList':
	saved_variables.append('std::vector<SavedVariable> {}_;'.format(name))
	release_variables.append('{}_.clear();'.format(name))
	unpack.append('auto {} = unpack_list({}_);'.format(name, name))
	elif arg['type'] == 'IntList':
	saved_variables.append('std::vector<int64_t> {};'.format(name))
	else:
	saved_variables.append('{} {};'.format(arg['type'], name))

	for arg in func['saved_inputs']:
	save_arg(arg, is_output=False)
	for arg in func['saved_outputs']:
	save_arg(arg, is_output=True)
	env['saved_variables'] = saved_variables
	env['release_variables'] = release_variables
	env['saved_list_sizes'] = saved_list_sizes

	if uses_retain_variables(func):
	env['will_release_variables'] = WILL_RELEASE_VARIABLES.substitute()
	else:
	env['will_release_variables'] = ''

	body = []

	if uses_single_grad(func):
	body.append('auto& grad = grads[0];')

	def emit_derivative(derivative):
	formula = derivative['formula']
	var_names = derivative['var_names']
	if len(var_names) == 1:
	return DERIVATIVE_SINGLE.substitute(name=var_names[0], derivative=formula)
	else:
	if 'grad_input_mask' in formula:
	masks = ['should_compute_output({{ {}_ix }}),'.format(n) for n in var_names]
	grad_input_mask = GRAD_INPUT_MASK.substitute(masks=masks, n=len(var_names))
	else:
	grad_input_mask = ''
	idx_ranges = ', '.join("{}_ix".format(n) for n in var_names)
	copy_ranges = []
	for i, n in enumerate(var_names):
	copy_ranges.append("copy_range(grad_inputs, {}_ix, std::get<{}>(grad_result));".format(n, i))
	return DERIVATIVE_MULTI.substitute(
	idx_ranges=idx_ranges, copy_ranges=copy_ranges,
	derivative=formula,
	grad_input_mask=grad_input_mask)

	body.extend(unpack)
	for derivative in func['derivatives']:
	body.append(emit_derivative(derivative))

	env['body'] = body
	if func['name'] in UNTRACEABLE_FUNCTIONS:
	env['superclass'] = 'Function'
	else:
	env['superclass'] = 'TraceableFunction'
	return nested_dict(env, func)


	def uses_ident(func, ident):
	if func is None:
	return False
	for derivative in func['derivatives']:
	formula = derivative['formula']
	if re.search(IDENT_REGEX.format(ident), formula):
	return True
	return False


	def uses_retain_variables(func):
	return uses_ident(func, 'retain_variables')


	def uses_single_grad(func):
	return uses_ident(func, 'grad')