backends/xnnpack/utils/quant_utils.py - platform/external/executorch - Git at Google

 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.

 import operator
 from itertools import accumulate
 from typing import cast

 import torch
 from executorch.exir.backend.canonical_partitioners.config_partitioner import (
     format_target_name,
 )

 _Q_OPS = {
     "quantize_per_tensor.tensor",
     "quantize_per_tensor.default",
     "quantize_per_channel.default",
     "quantize_per_channel_group.default",
     "quantize_per_token.default",
     "quantize_affine.default",
 }

 _DQ_OPS = {
     "dequantize_per_tensor.tensor",
     "dequantize_per_tensor.default",
     "dequantize_per_channel.default",
     "dequantize_per_channel_group.default",
     "dequantize_per_token.default",
     "dequantize_affine.default",
 }


 _QPARAM_OPS = {
     "choose_qparams.tensor",
     "choose_qparams_per_token_asymmetric.default",
     "choose_qparams_affine.default",
 }

 _DYNAMIC_OPS = {
     "quantize_per_tensor.tensor",
     "quantize_per_token.default",
     "dequantize_per_tensor.tensor",
     "dequantize_per_token.default",
 }


 def is_dynamic_qdq(node: torch.fx.Node) -> bool:
     if node.op != "call_function":
         return False
     node_name = format_target_name(node.target.__name__)  # pyre-ignore
     is_dynamic_affine = is_per_token(node) and not is_per_channel_group(node)

     return node_name in _DYNAMIC_OPS or is_dynamic_affine


 def is_qparam(node: torch.fx.Node) -> bool:
     if node.op != "call_function":
         return False
     node_name = format_target_name(node.target.__name__)  # pyre-ignore

     return node_name in _QPARAM_OPS


 def is_quant(node: torch.fx.Node) -> bool:
     if node.op != "call_function":
         return False
     node_name = format_target_name(node.target.__name__)  # pyre-ignore

     return node_name in _Q_OPS


 def is_dequant(node: torch.fx.Node) -> bool:
     if node.op != "call_function":
         return False
     node_name = format_target_name(node.target.__name__)  # pyre-ignore

     return node_name in _DQ_OPS


 def is_per_channel(node: torch.fx.Node) -> bool:
     if not (is_quant(node) or is_dequant(node)):
         return False

     is_affine_per_channel_group = is_per_channel_group(node)
     is_per_channel = "per_channel" in node.target.__name__  # pyre-ignore

     return is_per_channel or is_affine_per_channel_group


 def is_affine_qdq(node: torch.fx.Node) -> bool:
     if not (is_quant(node) or is_dequant(node)):
         return False

     return "quantize_affine" in node.target.__name__  # pyre-ignore


 def _get_block_size_input_scale(node: torch.fx.Node):
     assert is_affine_qdq(node)
     block_size = node.args[1]
     input_val = node.all_input_nodes[0].meta["val"]
     scale_val = node.all_input_nodes[1].meta["val"]
     return block_size, input_val, scale_val


 def is_per_token(node: torch.fx.Node):
     if not (is_quant(node) or is_dequant(node)):
         return False

     if "per_token" in node.target.__name__:  # pyre-ignore
         return True
     elif is_affine_qdq(node):
         block_size, input_val, scale_val = _get_block_size_input_scale(node)
         flag = True
         scale_numel_expected = 1
         for i in range(len(block_size) - 1):
             flag &= block_size[i] == 1
             scale_numel_expected *= input_val.shape[i]

         flag &= block_size[-1] == input_val.shape[-1]
         flag &= scale_val.numel() == scale_numel_expected
         return flag

     return False


 def is_per_channel_group(node: torch.fx.Node):
     if not (is_quant(node) or is_dequant(node)):
         return False

     if "per_channel_group" in node.target.__name__:  # pyre-ignore
         return True
     elif is_affine_qdq(node):
         block_size, input_val, scale_val = _get_block_size_input_scale(node)
         flag = True
         flag &= len(block_size) == 2
         flag &= block_size[0] == 1
         group_size = block_size[1]
         scale_numel = list(accumulate(scale_val.shape, operator.mul))[-1]
         input_numel = list(accumulate(input_val.shape, operator.mul))[-1]
         flag &= input_numel == group_size * scale_numel
         return flag

     return False


 def extract_qdq_affine_op_args_for_decomposed_ops(node: torch.fx.Node):
     if not is_affine_qdq(node):
         return None, None
     # make sure input_dtype and zero_point_domain have expected values
     input_node = node.args[0]
     scale_node = node.args[2]
     zero_point_node = node.args[3]
     args = [input_node, scale_node, zero_point_node]
     assert (
         len(node.args) > 4
     ), f"expecting at least 6 args, got node: {node.format_node()}"

     if node.args[4] != torch.int8:
         return None, None
     target_dtype = cast(torch.dtype, node.args[4])

     if len(node.args) > 6:
         # quant_min
         args.append(node.args[5])
         # quant_max
         args.append(node.args[6])
     else:
         dtype_info = torch.iinfo(target_dtype)
         quant_min = dtype_info.min
         quant_max = dtype_info.max
         args.append(quant_min)
         args.append(quant_max)

     # add target_dtype_node after quant_min/quant_max
     args.append(target_dtype)
     # zero_point_domain
     if len(node.args) > 7 and node.args[7] != "INT":
         return None, None

     if is_per_channel_group(node):
         block_sizes = cast(list[int], node.args[1])
         args.append(block_sizes[-1])

     args.append(node.args[-1])

     return args
	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	import operator
	from itertools import accumulate
	from typing import cast

	import torch
	from executorch.exir.backend.canonical_partitioners.config_partitioner import (
	format_target_name,
	)

	_Q_OPS = {
	"quantize_per_tensor.tensor",
	"quantize_per_tensor.default",
	"quantize_per_channel.default",
	"quantize_per_channel_group.default",
	"quantize_per_token.default",
	"quantize_affine.default",
	}

	_DQ_OPS = {
	"dequantize_per_tensor.tensor",
	"dequantize_per_tensor.default",
	"dequantize_per_channel.default",
	"dequantize_per_channel_group.default",
	"dequantize_per_token.default",
	"dequantize_affine.default",
	}


	_QPARAM_OPS = {
	"choose_qparams.tensor",
	"choose_qparams_per_token_asymmetric.default",
	"choose_qparams_affine.default",
	}

	_DYNAMIC_OPS = {
	"quantize_per_tensor.tensor",
	"quantize_per_token.default",
	"dequantize_per_tensor.tensor",
	"dequantize_per_token.default",
	}


	def is_dynamic_qdq(node: torch.fx.Node) -> bool:
	if node.op != "call_function":
	return False
	node_name = format_target_name(node.target.__name__) # pyre-ignore
	is_dynamic_affine = is_per_token(node) and not is_per_channel_group(node)

	return node_name in _DYNAMIC_OPS or is_dynamic_affine


	def is_qparam(node: torch.fx.Node) -> bool:
	if node.op != "call_function":
	return False
	node_name = format_target_name(node.target.__name__) # pyre-ignore

	return node_name in _QPARAM_OPS


	def is_quant(node: torch.fx.Node) -> bool:
	if node.op != "call_function":
	return False
	node_name = format_target_name(node.target.__name__) # pyre-ignore

	return node_name in _Q_OPS


	def is_dequant(node: torch.fx.Node) -> bool:
	if node.op != "call_function":
	return False
	node_name = format_target_name(node.target.__name__) # pyre-ignore

	return node_name in _DQ_OPS


	def is_per_channel(node: torch.fx.Node) -> bool:
	if not (is_quant(node) or is_dequant(node)):
	return False

	is_affine_per_channel_group = is_per_channel_group(node)
	is_per_channel = "per_channel" in node.target.__name__ # pyre-ignore

	return is_per_channel or is_affine_per_channel_group


	def is_affine_qdq(node: torch.fx.Node) -> bool:
	if not (is_quant(node) or is_dequant(node)):
	return False

	return "quantize_affine" in node.target.__name__ # pyre-ignore


	def _get_block_size_input_scale(node: torch.fx.Node):
	assert is_affine_qdq(node)
	block_size = node.args[1]
	input_val = node.all_input_nodes[0].meta["val"]
	scale_val = node.all_input_nodes[1].meta["val"]
	return block_size, input_val, scale_val


	def is_per_token(node: torch.fx.Node):
	if not (is_quant(node) or is_dequant(node)):
	return False

	if "per_token" in node.target.__name__: # pyre-ignore
	return True
	elif is_affine_qdq(node):
	block_size, input_val, scale_val = _get_block_size_input_scale(node)
	flag = True
	scale_numel_expected = 1
	for i in range(len(block_size) - 1):
	flag &= block_size[i] == 1
	scale_numel_expected *= input_val.shape[i]

	flag &= block_size[-1] == input_val.shape[-1]
	flag &= scale_val.numel() == scale_numel_expected
	return flag

	return False


	def is_per_channel_group(node: torch.fx.Node):
	if not (is_quant(node) or is_dequant(node)):
	return False

	if "per_channel_group" in node.target.__name__: # pyre-ignore
	return True
	elif is_affine_qdq(node):
	block_size, input_val, scale_val = _get_block_size_input_scale(node)
	flag = True
	flag &= len(block_size) == 2
	flag &= block_size[0] == 1
	group_size = block_size[1]
	scale_numel = list(accumulate(scale_val.shape, operator.mul))[-1]
	input_numel = list(accumulate(input_val.shape, operator.mul))[-1]
	flag &= input_numel == group_size * scale_numel
	return flag

	return False


	def extract_qdq_affine_op_args_for_decomposed_ops(node: torch.fx.Node):
	if not is_affine_qdq(node):
	return None, None
	# make sure input_dtype and zero_point_domain have expected values
	input_node = node.args[0]
	scale_node = node.args[2]
	zero_point_node = node.args[3]
	args = [input_node, scale_node, zero_point_node]
	assert (
	len(node.args) > 4
	), f"expecting at least 6 args, got node: {node.format_node()}"

	if node.args[4] != torch.int8:
	return None, None
	target_dtype = cast(torch.dtype, node.args[4])

	if len(node.args) > 6:
	# quant_min
	args.append(node.args[5])
	# quant_max
	args.append(node.args[6])
	else:
	dtype_info = torch.iinfo(target_dtype)
	quant_min = dtype_info.min
	quant_max = dtype_info.max
	args.append(quant_min)
	args.append(quant_max)

	# add target_dtype_node after quant_min/quant_max
	args.append(target_dtype)
	# zero_point_domain
	if len(node.args) > 7 and node.args[7] != "INT":
	return None, None

	if is_per_channel_group(node):
	block_sizes = cast(list[int], node.args[1])
	args.append(block_sizes[-1])

	args.append(node.args[-1])

	return args