test/onnx/test_fx_to_onnx.py - platform/external/pytorch - Git at Google

 # Owner(s): ["module: onnx"]
 from __future__ import annotations

 import tempfile

 import onnx
 import pytorch_test_common
 import torch
 import transformers  # type: ignore[import]
 from torch import nn
 from torch._subclasses import fake_tensor
 from torch.nn import functional as F
 from torch.onnx import dynamo_export, ExportOptions
 from torch.onnx._internal.diagnostics import infra
 from torch.onnx._internal.fx import diagnostics, registration
 from torch.testing._internal import common_utils


 def assert_has_diagnostics(
     diagnostic_context: diagnostics.DiagnosticContext,
     rule: infra.Rule,
     level: infra.Level,
     expected_node: str,
 ):
     rule_level_pairs = (rule.id, level.name.lower())
     sarif_log = diagnostic_context.sarif_log()
     actual_results = []
     for run in sarif_log.runs:
         if run.results is None:
             continue
         for result in run.results:
             id_level_pair = (result.rule_id, result.level)
             actual_results.append(id_level_pair)
             if (
                 rule_level_pairs == id_level_pair
                 and result.message.text
                 and result.message.markdown
                 and expected_node in result.message.text
             ):
                 return

     raise AssertionError(
         f"Expected diagnostic results of rule id and level pair {rule_level_pairs} "
         f"not found with expected error node {expected_node} and "
         f"Actual diagnostic results: {actual_results}"
     )


 @common_utils.instantiate_parametrized_tests
 class TestFxToOnnx(pytorch_test_common.ExportTestCase):
     def setUp(self):
         super().setUp()
         self.export_options = ExportOptions()

     def tearDown(self):
         super().tearDown()

     def test_simple_function(self):
         def func(x):
             y = x + 1
             z = y.relu()
             return (y, z)

         _ = dynamo_export(
             func, torch.randn(1, 1, 2), export_options=self.export_options
         )

     def test_empty(self):
         # Since `torch.empty` returns tensor with uninitialized data, we cannot
         # test this under `test_fx_to_onnx_with_onnxruntime.py` with result comparison.
         def func(x):
             return torch.empty(x.size(), dtype=torch.int64)

         tensor_x = torch.randn(1, 1, 2)
         _ = dynamo_export(func, tensor_x, export_options=self.export_options)

     def test_args_used_for_export_is_not_converted_to_fake_tensors(self):
         def func(x, y):
             return x + y

         tensor_x = torch.randn(1, 1, 2)
         tensor_y = torch.randn(1, 1, 2)
         _ = dynamo_export(func, tensor_x, tensor_y, export_options=self.export_options)
         self.assertNotIsInstance(tensor_x, fake_tensor.FakeTensor)
         self.assertNotIsInstance(tensor_y, fake_tensor.FakeTensor)

     @common_utils.parametrize(
         "diagnostic_rule",
         [
             common_utils.subtest(
                 diagnostics.rules.find_opschema_matched_symbolic_function,
                 name="optional_inputs",
             ),
             common_utils.subtest(
                 diagnostics.rules.op_level_debugging,
                 name="get_attr_node_in_op_level_debug",
             ),
         ],
     )
     def test_mnist_exported_with_no_warnings(self, diagnostic_rule):
         class MNISTModel(nn.Module):
             def __init__(self):
                 super().__init__()
                 self.conv1 = nn.Conv2d(1, 32, 3, 1, bias=False)
                 self.conv2 = nn.Conv2d(32, 64, 3, 1, bias=False)
                 self.fc1 = nn.Linear(9216, 128, bias=False)
                 self.fc2 = nn.Linear(128, 10, bias=False)

             def forward(self, tensor_x: torch.Tensor):
                 tensor_x = self.conv1(tensor_x)
                 tensor_x = F.sigmoid(tensor_x)
                 tensor_x = self.conv2(tensor_x)
                 tensor_x = F.sigmoid(tensor_x)
                 tensor_x = F.max_pool2d(tensor_x, 2)
                 tensor_x = torch.flatten(tensor_x, 1)
                 tensor_x = self.fc1(tensor_x)
                 tensor_x = F.sigmoid(tensor_x)
                 tensor_x = self.fc2(tensor_x)
                 output = F.log_softmax(tensor_x, dim=1)
                 return output

         tensor_x = torch.rand((64, 1, 28, 28), dtype=torch.float32)
         export_output = dynamo_export(
             MNISTModel(), tensor_x, export_options=ExportOptions(op_level_debug=True)
         )

         assert_has_diagnostics(
             export_output.diagnostic_context,
             diagnostic_rule,
             diagnostics.levels.NONE,
             expected_node="aten.convolution.default",
         )

     def test_trace_only_op_with_evaluator(self):
         model_input = torch.tensor([[1.0, 2.0, 3.0], [1.0, 1.0, 2.0]])

         class ArgminArgmaxModel(torch.nn.Module):
             def forward(self, input):
                 return (
                     torch.argmin(input),
                     torch.argmax(input),
                     torch.argmin(input, keepdim=True),
                     torch.argmax(input, keepdim=True),
                     torch.argmin(input, dim=0, keepdim=True),
                     torch.argmax(input, dim=1, keepdim=True),
                 )

         _ = dynamo_export(
             ArgminArgmaxModel(), model_input, export_options=self.export_options
         )

     def test_multiple_outputs_op_with_evaluator(self):
         class TopKModel(torch.nn.Module):
             def forward(self, x):
                 values, _ = torch.topk(x, 3)
                 return torch.sum(values)

         x = torch.arange(1.0, 6.0, requires_grad=True)
         export_output = dynamo_export(
             TopKModel(), x, export_options=self.export_options
         )

     def test_unsupported_indices_fake_tensor_generated_with_op_level_debug(self):
         class EmbedModelWithoutPaddingIdx(torch.nn.Module):
             def forward(self, input, emb):
                 return torch.nn.functional.embedding(input, emb)

         model = EmbedModelWithoutPaddingIdx()
         x = torch.randint(4, (4, 3, 2))
         embedding_matrix = torch.rand(10, 3)

         export_output = dynamo_export(
             model,
             x,
             embedding_matrix,
             export_options=ExportOptions(op_level_debug=True),
         )
         assert_has_diagnostics(
             export_output.diagnostic_context,
             diagnostics.rules.op_level_debugging,
             diagnostics.levels.WARNING,
             expected_node="aten.embedding.default",
         )

     def test_unsupported_function_schema_raises_diagnostic_warning_when_found_nearest_match(
         self,
     ):
         class TraceModel(torch.nn.Module):
             def forward(self, input):
                 return input.new_zeros(())

         x = torch.randn((2, 3), dtype=torch.float32)
         export_output = dynamo_export(TraceModel(), x)

         assert_has_diagnostics(
             export_output.diagnostic_context,
             diagnostics.rules.find_opschema_matched_symbolic_function,
             diagnostics.levels.WARNING,
             expected_node="aten.new_zeros.default",
         )

     def test_perfect_match_on_sequence_and_bool_attributes(
         self,
     ):
         class TraceModel(torch.nn.Module):
             def __init__(self):
                 super().__init__()
                 self.conv2 = torch.nn.Conv2d(
                     16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1)
                 )

             def forward(self, input):
                 return self.conv2(input)

         x = torch.randn(20, 16, 50, 50)
         export_output = dynamo_export(
             TraceModel(), x, export_options=ExportOptions(op_level_debug=False)
         )
         assert_has_diagnostics(
             export_output.diagnostic_context,
             diagnostics.rules.find_opschema_matched_symbolic_function,
             diagnostics.levels.NONE,
             expected_node="aten.convolution.default",
         )

     def test_dispatch_overload_fall_back_default_raise_diagnostic_warning(self):
         class TraceModel(torch.nn.Module):
             def forward(self, input):
                 return torch.ops.aten.add.Tensor(input, input)

         onnx_registry = torch.onnx.OnnxRegistry()
         self.assertTrue(
             onnx_registry.is_registered_op(
                 namespace="aten", op_name="add", overload="Tensor"
             )
         )
         # TODO: Replace this example with a torch custom op when overload is supported
         # Currently, torch only supports custom op with namespace and op_name
         aten_add_Tensor = registration.OpName.from_name_parts(
             namespace="aten", op_name="add", overload="Tensor"
         )
         onnx_registry._registry.pop(aten_add_Tensor)

         x = torch.tensor(3)
         export_output = dynamo_export(
             TraceModel(), x, export_options=ExportOptions(onnx_registry=onnx_registry)
         )
         assert_has_diagnostics(
             export_output.diagnostic_context,
             diagnostics.rules.find_operator_overloads_in_onnx_registry,
             diagnostics.levels.WARNING,
             expected_node="aten.add.Tensor",
         )

     def test_aten_clone_does_not_raise_warning_of_lack_of_memory_format(self):
         class CustomModule(torch.nn.Module):
             def forward(self, input):
                 return torch.ops.aten.clone(input, memory_format=torch.preserve_format)

         x = torch.tensor(3)
         export_output = dynamo_export(CustomModule(), x)
         assert_has_diagnostics(
             export_output.diagnostic_context,
             diagnostics.rules.find_opschema_matched_symbolic_function,
             diagnostics.levels.NONE,
             expected_node="aten.clone.default",
         )

     def test_dynamo_export_retains_readable_parameter_and_buffer_names(self):
         class SubModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
                 self.conv2 = nn.Conv2d(32, 64, 3, 1, bias=False)
                 self.fc1 = nn.Linear(9216, 128, bias=False)
                 self.register_buffer("buffer", torch.randn(1, 128))

             def forward(self, tensor_x: torch.Tensor):
                 tensor_x = self.conv2(tensor_x)
                 tensor_x = F.sigmoid(tensor_x)
                 tensor_x = F.max_pool2d(tensor_x, 2)
                 tensor_x = torch.flatten(tensor_x, 1)
                 tensor_x = self.fc1(tensor_x)
                 tensor_x = tensor_x + self.buffer
                 tensor_x = F.sigmoid(tensor_x)
                 return tensor_x

         class MNISTModel(nn.Module):
             def __init__(self):
                 super().__init__()
                 self.conv1 = nn.Conv2d(1, 32, 3, 1, bias=False)
                 self.submodule = SubModule()
                 self.fc2 = nn.Linear(128, 10, bias=False)

             def forward(self, tensor_x: torch.Tensor):
                 tensor_x = self.conv1(tensor_x)
                 tensor_x = F.sigmoid(tensor_x)
                 tensor_x = self.submodule(tensor_x)
                 tensor_x = self.fc2(tensor_x)
                 output = F.log_softmax(tensor_x, dim=1)
                 return output

         tensor_x = torch.rand((64, 1, 28, 28), dtype=torch.float32)

         model = MNISTModel()
         export_output = torch.onnx.dynamo_export(model, tensor_x)
         model_proto = export_output.model_proto
         self.assertEqual(
             {initializer.name for initializer in model_proto.graph.initializer},
             {*model.state_dict().keys()},
         )

     def test_fake_tensor_mode_simple(self):
         class Model(torch.nn.Module):
             def __init__(self) -> None:
                 super().__init__()
                 self.linear = torch.nn.Linear(2, 2)

             def forward(self, x):
                 out = self.linear(x)
                 return out

         with torch.onnx.enable_fake_mode() as fake_context:
             x = torch.rand(5, 2, 2)
             model = Model()
             export_options = ExportOptions(fake_context=fake_context)
             export_output = torch.onnx.dynamo_export(
                 model, x, export_options=export_options
             )

         assert (
             export_output is not None
         ), "ExportOutput must be created on successful export"
         assert (
             export_output.model_proto is not None
         ), "A model protobuf must be created on a successful export"
         onnx.checker.check_model(export_output.model_proto, full_check=True)
         assert (
             len(export_output.model_proto.graph.initializer) == 0
         ), "Initializers cannot exist when fake mode is enabled"

         # Variant 1: Save ONNX proto using Model's state_dict()
         with tempfile.NamedTemporaryFile(suffix=".onnx") as tmp_onnx_file:
             model_state_dict = Model().state_dict()  # Create a state_dict for testing
             export_output.save(tmp_onnx_file.name, model_state_dict=model_state_dict)
             assert (
                 len(onnx.load(tmp_onnx_file.name).graph.initializer) == 2
             ), "Initializers must be present after loading it from model_state_dict"

         # Variant 2: Save ONNX proto using Model checkpoint file
         with tempfile.NamedTemporaryFile(
             suffix=".onnx"
         ) as tmp_onnx_file, tempfile.NamedTemporaryFile(
             suffix=".pt"
         ) as tmp_checkpoint_file:
             torch.save(
                 Model().state_dict(), tmp_checkpoint_file.name
             )  # Create checkpoint file for testing
             export_output.save(
                 tmp_onnx_file.name, model_state_dict=tmp_checkpoint_file.name
             )
             assert (
                 len(onnx.load(tmp_onnx_file.name).graph.initializer) == 2
             ), "Initializers must be present after loading it from model_state_dict"

     def test_fake_tensor_mode_simple_invalid_input(self):
         class Model(torch.nn.Module):
             def __init__(self) -> None:
                 super().__init__()
                 self.linear = torch.nn.Linear(2, 2)

             def forward(self, x):
                 out = self.linear(x)
                 return out

         real_model = Model()
         real_x = torch.rand(5, 2, 2)
         with torch.onnx.enable_fake_mode() as fake_context:
             fake_model = Model()
             fake_x = torch.rand(5, 2, 2)

             # TODO: Split each scenario on its own test case
             # Scenario 1: Fake model and fake input WITHOUT ExportOptions(fake_context=...)
             with self.assertRaises(torch.onnx.OnnxExporterError):
                 export_options = ExportOptions(fake_context=None)
                 _ = torch.onnx.dynamo_export(
                     fake_model, fake_x, export_options=export_options
                 )

             # Scenario 2: Fake model and real input WITHOUT fake_context
             with self.assertRaises(torch.onnx.OnnxExporterError):
                 export_options = ExportOptions(fake_context=None)
                 _ = torch.onnx.dynamo_export(
                     fake_model, real_x, export_options=export_options
                 )

             # Scenario 3: Real model and real input WITH fake_context
             with self.assertRaises(torch.onnx.OnnxExporterError):
                 export_options = ExportOptions(fake_context=fake_context)
                 _ = torch.onnx.dynamo_export(
                     real_model, real_x, export_options=export_options
                 )

             # Scenario 4: Fake model and real input WITH fake_context
             with self.assertRaises(torch.onnx.OnnxExporterError):
                 export_options = ExportOptions(fake_context=fake_context)
                 _ = torch.onnx.dynamo_export(
                     fake_model, real_x, export_options=export_options
                 )

     # NOTE: To all transformer models, config is preferred to pre-trained model for testing because:
     # 1. Pre-trained model is too big for CI
     # 2. Pre-trained model is has uint8/bool issue: https://github.com/huggingface/transformers/issues/21013
     def test_fake_tensor_mode_huggingface_gpt2(self):
         config = transformers.GPT2Config(
             vocab_size=8096, n_positions=256, n_embd=256, n_layer=2, n_head=2
         )
         batch, seq = 4, 256

         with torch.onnx.enable_fake_mode() as fake_context:
             model = transformers.GPT2Model(config).eval()
             input_ids = torch.randint(0, config.vocab_size, (batch, seq))
             attention_mask = torch.ones(batch, seq, dtype=torch.bool)
             position_ids = torch.arange(0, seq, dtype=torch.long)
             position_ids = position_ids.unsqueeze(0).view(-1, seq)

             export_options = torch.onnx.ExportOptions(fake_context=fake_context)
             export_output = torch.onnx.dynamo_export(
                 model,
                 input_ids=input_ids,
                 attention_mask=attention_mask,
                 position_ids=position_ids,
                 export_options=export_options,
             )
             onnx.checker.check_model(export_output.model_proto)
             onnx.shape_inference.infer_shapes(export_output.model_proto)

     def test_fake_tensor_mode_huggingface_bigscience_bloom(self):
         config = transformers.BloomConfig()
         batch, seq = 4, 256

         with torch.onnx.enable_fake_mode() as fake_context:
             model = transformers.BloomModel(config).eval()
             input_ids = torch.randint(0, config.vocab_size, (batch, seq))
             attention_mask = torch.ones(batch, seq, dtype=torch.bool)

             export_options = torch.onnx.ExportOptions(fake_context=fake_context)
             export_output = torch.onnx.dynamo_export(
                 model,
                 input_ids=input_ids,
                 attention_mask=attention_mask,
                 export_options=export_options,
             )
             onnx.checker.check_model(export_output.model_proto)
             onnx.shape_inference.infer_shapes(export_output.model_proto)

     def test_fake_tensor_mode_huggingface_open_llama(self):
         config = transformers.OpenLlamaConfig(
             vocab_size=8096, hidden_size=256, num_hidden_layers=2, num_attention_heads=2
         )
         batch, seq = 4, 256

         with torch.onnx.enable_fake_mode() as fake_context:
             model = transformers.OpenLlamaModel(config).eval()
             input_ids = torch.randint(0, config.vocab_size, (batch, seq))
             attention_mask = torch.ones(batch, seq, dtype=torch.bool)
             position_ids = torch.arange(0, seq, dtype=torch.long)
             position_ids = position_ids.unsqueeze(0).view(-1, seq)

             export_options = torch.onnx.ExportOptions(fake_context=fake_context)
             export_output = torch.onnx.dynamo_export(
                 model,
                 input_ids=input_ids,
                 attention_mask=attention_mask,
                 position_ids=position_ids,
                 export_options=export_options,
             )
             onnx.checker.check_model(export_output.model_proto)
             onnx.shape_inference.infer_shapes(export_output.model_proto)

     def test_fake_tensor_mode_huggingface_google_t5(self):
         config = transformers.T5Config(
             vocab_size=8096, d_model=256, num_layers=2, num_heads=2
         )
         device = "cpu"
         batch, seq = 4, 256
         with torch.onnx.enable_fake_mode() as fake_context:
             model = transformers.T5Model(config).to(device).eval()
             input_ids = torch.randint(0, config.vocab_size, (batch, seq))
             attention_mask = torch.ones((batch, seq), dtype=torch.bool)
             decoder_input_ids = torch.randint(0, config.vocab_size, (batch, seq))
             export_options = torch.onnx.ExportOptions(fake_context=fake_context)
             export_output = torch.onnx.dynamo_export(
                 model,
                 input_ids=input_ids,
                 attention_mask=attention_mask,
                 decoder_input_ids=decoder_input_ids,
                 export_options=export_options,
             )
             onnx.checker.check_model(export_output.model_proto)
             onnx.shape_inference.infer_shapes(export_output.model_proto)

     def test_fake_tensor_mode_huggingface_openai_whisper(self):
         config = transformers.WhisperConfig()
         feature_extractor = transformers.WhisperFeatureExtractor()
         device = "cpu"
         batch = 4
         with torch.onnx.enable_fake_mode() as fake_context:
             input_features = torch.randn(
                 (
                     batch,
                     feature_extractor.feature_size,
                     feature_extractor.nb_max_frames,
                 ),
                 dtype=torch.float32,
             )
             decoder_input_ids = torch.tensor([[1, 1]]) * config.decoder_start_token_id
             model = transformers.AutoModel.from_config(config).to(device).eval()
             export_options = torch.onnx.ExportOptions(fake_context=fake_context)
             export_output = torch.onnx.dynamo_export(
                 model,
                 input_features,
                 decoder_input_ids=decoder_input_ids,
                 export_options=export_options,
             )
             onnx.checker.check_model(export_output.model_proto)
             onnx.shape_inference.infer_shapes(export_output.model_proto)

     # TODO: From Config/Model
     @pytorch_test_common.skip_in_ci(
         "Not decorated with xfail because CI doesn't have enough memory to run and then fail."
         "SymFloat in OnnxFUnction attribute is not supported yet."
     )
     def test_fake_tensor_mode_huggingface_databricks_dolly_v2_3b(self):
         # TODO: Make this test work with config
         # Dolly has no config on transformers
         model_name = "databricks/dolly-v2-3b"
         device = "cpu"
         with torch.onnx.enable_fake_mode() as fake_context:
             tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
             inputs = tokenizer("Hello world!", return_tensors="pt")
             model = transformers.AutoModel.from_pretrained(model_name).to(device).eval()

             export_options = torch.onnx.ExportOptions(fake_context=fake_context)
             export_output = torch.onnx.dynamo_export(
                 model, **inputs, export_options=export_options
             )
             onnx.checker.check_model(export_output.model_proto)
             onnx.shape_inference.infer_shapes(export_output.model_proto)

     @pytorch_test_common.skip_in_ci(
         "Not decorated with xfail because CI doesn't have enough memory to run and then fail."
         "AssertionError: Mutating module attribute seq_len_cached during export."
         "self.seq_len_cached = seq_len"
     )
     def test_fake_tensor_mode_huggingface_tiiuae_falcon(self):
         config = transformers.FalconConfig()
         batch, seq = 4, 256

         with torch.onnx.enable_fake_mode() as fake_context:
             model = transformers.FalconModel(config).eval()
             input_ids = torch.randint(0, config.vocab_size, (batch, seq))
             attention_mask = torch.ones(batch, seq, dtype=torch.bool)

             export_options = torch.onnx.ExportOptions(fake_context=fake_context)
             export_output = torch.onnx.dynamo_export(
                 model,
                 input_ids=input_ids,
                 attention_mask=attention_mask,
                 export_options=export_options,
             )
             onnx.checker.check_model(export_output.model_proto)
             onnx.shape_inference.infer_shapes(export_output.model_proto)

     @pytorch_test_common.skip_in_ci(
         "Not decorated with xfail because CI doesn't have enough memory to run and then fail."
         "torch._dynamo.exc.UserError: Dynamic control flow is not supported at the moment. "
         "Please use functorch.experimental.control_flow.cond to explicitly capture the control flow"
     )
     def test_fake_tensor_mode_huggingface_mosaicml_mpt_7b(self):
         # TODO: Make this test work with config
         # mpt-7b has no config on transformers
         model_name = "mosaicml/mpt-7b"
         device = "cpu"
         with torch.onnx.enable_fake_mode() as fake_context:
             tokenizer = transformers.AutoTokenizer.from_pretrained(
                 model_name, trust_remote_code=True
             )
             inputs = tokenizer("Hello world!", return_tensors="pt")
             model = (
                 transformers.AutoModelForCausalLM.from_pretrained(
                     model_name, trust_remote_code=True
                 )
                 .to(device)
                 .eval()
             )

             export_options = torch.onnx.ExportOptions(fake_context=fake_context)
             export_output = torch.onnx.dynamo_export(
                 model, **inputs, export_options=export_options
             )
             onnx.checker.check_model(export_output.model_proto)
             onnx.shape_inference.infer_shapes(export_output.model_proto)


 if __name__ == "__main__":
     common_utils.run_tests()
	# Owner(s): ["module: onnx"]
	from __future__ import annotations

	import tempfile

	import onnx
	import pytorch_test_common
	import torch
	import transformers # type: ignore[import]
	from torch import nn
	from torch._subclasses import fake_tensor
	from torch.nn import functional as F
	from torch.onnx import dynamo_export, ExportOptions
	from torch.onnx._internal.diagnostics import infra
	from torch.onnx._internal.fx import diagnostics, registration
	from torch.testing._internal import common_utils


	def assert_has_diagnostics(
	diagnostic_context: diagnostics.DiagnosticContext,
	rule: infra.Rule,
	level: infra.Level,
	expected_node: str,
	):
	rule_level_pairs = (rule.id, level.name.lower())
	sarif_log = diagnostic_context.sarif_log()
	actual_results = []
	for run in sarif_log.runs:
	if run.results is None:
	continue
	for result in run.results:
	id_level_pair = (result.rule_id, result.level)
	actual_results.append(id_level_pair)
	if (
	rule_level_pairs == id_level_pair
	and result.message.text
	and result.message.markdown
	and expected_node in result.message.text
	):
	return

	raise AssertionError(
	f"Expected diagnostic results of rule id and level pair {rule_level_pairs} "
	f"not found with expected error node {expected_node} and "
	f"Actual diagnostic results: {actual_results}"
	)


	@common_utils.instantiate_parametrized_tests
	class TestFxToOnnx(pytorch_test_common.ExportTestCase):
	def setUp(self):
	super().setUp()
	self.export_options = ExportOptions()

	def tearDown(self):
	super().tearDown()

	def test_simple_function(self):
	def func(x):
	y = x + 1
	z = y.relu()
	return (y, z)

	_ = dynamo_export(
	func, torch.randn(1, 1, 2), export_options=self.export_options
	)

	def test_empty(self):
	# Since `torch.empty` returns tensor with uninitialized data, we cannot
	# test this under `test_fx_to_onnx_with_onnxruntime.py` with result comparison.
	def func(x):
	return torch.empty(x.size(), dtype=torch.int64)

	tensor_x = torch.randn(1, 1, 2)
	_ = dynamo_export(func, tensor_x, export_options=self.export_options)

	def test_args_used_for_export_is_not_converted_to_fake_tensors(self):
	def func(x, y):
	return x + y

	tensor_x = torch.randn(1, 1, 2)
	tensor_y = torch.randn(1, 1, 2)
	_ = dynamo_export(func, tensor_x, tensor_y, export_options=self.export_options)
	self.assertNotIsInstance(tensor_x, fake_tensor.FakeTensor)
	self.assertNotIsInstance(tensor_y, fake_tensor.FakeTensor)

	@common_utils.parametrize(
	"diagnostic_rule",
	[
	common_utils.subtest(
	diagnostics.rules.find_opschema_matched_symbolic_function,
	name="optional_inputs",
	),
	common_utils.subtest(
	diagnostics.rules.op_level_debugging,
	name="get_attr_node_in_op_level_debug",
	),
	],
	)
	def test_mnist_exported_with_no_warnings(self, diagnostic_rule):
	class MNISTModel(nn.Module):
	def __init__(self):
	super().__init__()
	self.conv1 = nn.Conv2d(1, 32, 3, 1, bias=False)
	self.conv2 = nn.Conv2d(32, 64, 3, 1, bias=False)
	self.fc1 = nn.Linear(9216, 128, bias=False)
	self.fc2 = nn.Linear(128, 10, bias=False)

	def forward(self, tensor_x: torch.Tensor):
	tensor_x = self.conv1(tensor_x)
	tensor_x = F.sigmoid(tensor_x)
	tensor_x = self.conv2(tensor_x)
	tensor_x = F.sigmoid(tensor_x)
	tensor_x = F.max_pool2d(tensor_x, 2)
	tensor_x = torch.flatten(tensor_x, 1)
	tensor_x = self.fc1(tensor_x)
	tensor_x = F.sigmoid(tensor_x)
	tensor_x = self.fc2(tensor_x)
	output = F.log_softmax(tensor_x, dim=1)
	return output

	tensor_x = torch.rand((64, 1, 28, 28), dtype=torch.float32)
	export_output = dynamo_export(
	MNISTModel(), tensor_x, export_options=ExportOptions(op_level_debug=True)
	)

	assert_has_diagnostics(
	export_output.diagnostic_context,
	diagnostic_rule,
	diagnostics.levels.NONE,
	expected_node="aten.convolution.default",
	)

	def test_trace_only_op_with_evaluator(self):
	model_input = torch.tensor([[1.0, 2.0, 3.0], [1.0, 1.0, 2.0]])

	class ArgminArgmaxModel(torch.nn.Module):
	def forward(self, input):
	return (
	torch.argmin(input),
	torch.argmax(input),
	torch.argmin(input, keepdim=True),
	torch.argmax(input, keepdim=True),
	torch.argmin(input, dim=0, keepdim=True),
	torch.argmax(input, dim=1, keepdim=True),
	)

	_ = dynamo_export(
	ArgminArgmaxModel(), model_input, export_options=self.export_options
	)

	def test_multiple_outputs_op_with_evaluator(self):
	class TopKModel(torch.nn.Module):
	def forward(self, x):
	values, _ = torch.topk(x, 3)
	return torch.sum(values)

	x = torch.arange(1.0, 6.0, requires_grad=True)
	export_output = dynamo_export(
	TopKModel(), x, export_options=self.export_options
	)

	def test_unsupported_indices_fake_tensor_generated_with_op_level_debug(self):
	class EmbedModelWithoutPaddingIdx(torch.nn.Module):
	def forward(self, input, emb):
	return torch.nn.functional.embedding(input, emb)

	model = EmbedModelWithoutPaddingIdx()
	x = torch.randint(4, (4, 3, 2))
	embedding_matrix = torch.rand(10, 3)

	export_output = dynamo_export(
	model,
	x,
	embedding_matrix,
	export_options=ExportOptions(op_level_debug=True),
	)
	assert_has_diagnostics(
	export_output.diagnostic_context,
	diagnostics.rules.op_level_debugging,
	diagnostics.levels.WARNING,
	expected_node="aten.embedding.default",
	)

	def test_unsupported_function_schema_raises_diagnostic_warning_when_found_nearest_match(
	self,
	):
	class TraceModel(torch.nn.Module):
	def forward(self, input):
	return input.new_zeros(())

	x = torch.randn((2, 3), dtype=torch.float32)
	export_output = dynamo_export(TraceModel(), x)

	assert_has_diagnostics(
	export_output.diagnostic_context,
	diagnostics.rules.find_opschema_matched_symbolic_function,
	diagnostics.levels.WARNING,
	expected_node="aten.new_zeros.default",
	)

	def test_perfect_match_on_sequence_and_bool_attributes(
	self,
	):
	class TraceModel(torch.nn.Module):
	def __init__(self):
	super().__init__()
	self.conv2 = torch.nn.Conv2d(
	16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1)
	)

	def forward(self, input):
	return self.conv2(input)

	x = torch.randn(20, 16, 50, 50)
	export_output = dynamo_export(
	TraceModel(), x, export_options=ExportOptions(op_level_debug=False)
	)
	assert_has_diagnostics(
	export_output.diagnostic_context,
	diagnostics.rules.find_opschema_matched_symbolic_function,
	diagnostics.levels.NONE,
	expected_node="aten.convolution.default",
	)

	def test_dispatch_overload_fall_back_default_raise_diagnostic_warning(self):
	class TraceModel(torch.nn.Module):
	def forward(self, input):
	return torch.ops.aten.add.Tensor(input, input)

	onnx_registry = torch.onnx.OnnxRegistry()
	self.assertTrue(
	onnx_registry.is_registered_op(
	namespace="aten", op_name="add", overload="Tensor"
	)
	)
	# TODO: Replace this example with a torch custom op when overload is supported
	# Currently, torch only supports custom op with namespace and op_name
	aten_add_Tensor = registration.OpName.from_name_parts(
	namespace="aten", op_name="add", overload="Tensor"
	)
	onnx_registry._registry.pop(aten_add_Tensor)

	x = torch.tensor(3)
	export_output = dynamo_export(
	TraceModel(), x, export_options=ExportOptions(onnx_registry=onnx_registry)
	)
	assert_has_diagnostics(
	export_output.diagnostic_context,
	diagnostics.rules.find_operator_overloads_in_onnx_registry,
	diagnostics.levels.WARNING,
	expected_node="aten.add.Tensor",
	)

	def test_aten_clone_does_not_raise_warning_of_lack_of_memory_format(self):
	class CustomModule(torch.nn.Module):
	def forward(self, input):
	return torch.ops.aten.clone(input, memory_format=torch.preserve_format)

	x = torch.tensor(3)
	export_output = dynamo_export(CustomModule(), x)
	assert_has_diagnostics(
	export_output.diagnostic_context,
	diagnostics.rules.find_opschema_matched_symbolic_function,
	diagnostics.levels.NONE,
	expected_node="aten.clone.default",
	)

	def test_dynamo_export_retains_readable_parameter_and_buffer_names(self):
	class SubModule(torch.nn.Module):
	def __init__(self):
	super().__init__()
	self.conv2 = nn.Conv2d(32, 64, 3, 1, bias=False)
	self.fc1 = nn.Linear(9216, 128, bias=False)
	self.register_buffer("buffer", torch.randn(1, 128))

	def forward(self, tensor_x: torch.Tensor):
	tensor_x = self.conv2(tensor_x)
	tensor_x = F.sigmoid(tensor_x)
	tensor_x = F.max_pool2d(tensor_x, 2)
	tensor_x = torch.flatten(tensor_x, 1)
	tensor_x = self.fc1(tensor_x)
	tensor_x = tensor_x + self.buffer
	tensor_x = F.sigmoid(tensor_x)
	return tensor_x

	class MNISTModel(nn.Module):
	def __init__(self):
	super().__init__()
	self.conv1 = nn.Conv2d(1, 32, 3, 1, bias=False)
	self.submodule = SubModule()
	self.fc2 = nn.Linear(128, 10, bias=False)

	def forward(self, tensor_x: torch.Tensor):
	tensor_x = self.conv1(tensor_x)
	tensor_x = F.sigmoid(tensor_x)
	tensor_x = self.submodule(tensor_x)
	tensor_x = self.fc2(tensor_x)
	output = F.log_softmax(tensor_x, dim=1)
	return output

	tensor_x = torch.rand((64, 1, 28, 28), dtype=torch.float32)

	model = MNISTModel()
	export_output = torch.onnx.dynamo_export(model, tensor_x)
	model_proto = export_output.model_proto
	self.assertEqual(
	{initializer.name for initializer in model_proto.graph.initializer},
	{*model.state_dict().keys()},
	)

	def test_fake_tensor_mode_simple(self):
	class Model(torch.nn.Module):
	def __init__(self) -> None:
	super().__init__()
	self.linear = torch.nn.Linear(2, 2)

	def forward(self, x):
	out = self.linear(x)
	return out

	with torch.onnx.enable_fake_mode() as fake_context:
	x = torch.rand(5, 2, 2)
	model = Model()
	export_options = ExportOptions(fake_context=fake_context)
	export_output = torch.onnx.dynamo_export(
	model, x, export_options=export_options
	)

	assert (
	export_output is not None
	), "ExportOutput must be created on successful export"
	assert (
	export_output.model_proto is not None
	), "A model protobuf must be created on a successful export"
	onnx.checker.check_model(export_output.model_proto, full_check=True)
	assert (
	len(export_output.model_proto.graph.initializer) == 0
	), "Initializers cannot exist when fake mode is enabled"

	# Variant 1: Save ONNX proto using Model's state_dict()
	with tempfile.NamedTemporaryFile(suffix=".onnx") as tmp_onnx_file:
	model_state_dict = Model().state_dict() # Create a state_dict for testing
	export_output.save(tmp_onnx_file.name, model_state_dict=model_state_dict)
	assert (
	len(onnx.load(tmp_onnx_file.name).graph.initializer) == 2
	), "Initializers must be present after loading it from model_state_dict"

	# Variant 2: Save ONNX proto using Model checkpoint file
	with tempfile.NamedTemporaryFile(
	suffix=".onnx"
	) as tmp_onnx_file, tempfile.NamedTemporaryFile(
	suffix=".pt"
	) as tmp_checkpoint_file:
	torch.save(
	Model().state_dict(), tmp_checkpoint_file.name
	) # Create checkpoint file for testing
	export_output.save(
	tmp_onnx_file.name, model_state_dict=tmp_checkpoint_file.name
	)
	assert (
	len(onnx.load(tmp_onnx_file.name).graph.initializer) == 2
	), "Initializers must be present after loading it from model_state_dict"

	def test_fake_tensor_mode_simple_invalid_input(self):
	class Model(torch.nn.Module):
	def __init__(self) -> None:
	super().__init__()
	self.linear = torch.nn.Linear(2, 2)

	def forward(self, x):
	out = self.linear(x)
	return out

	real_model = Model()
	real_x = torch.rand(5, 2, 2)
	with torch.onnx.enable_fake_mode() as fake_context:
	fake_model = Model()
	fake_x = torch.rand(5, 2, 2)

	# TODO: Split each scenario on its own test case
	# Scenario 1: Fake model and fake input WITHOUT ExportOptions(fake_context=...)
	with self.assertRaises(torch.onnx.OnnxExporterError):
	export_options = ExportOptions(fake_context=None)
	_ = torch.onnx.dynamo_export(
	fake_model, fake_x, export_options=export_options
	)

	# Scenario 2: Fake model and real input WITHOUT fake_context
	with self.assertRaises(torch.onnx.OnnxExporterError):
	export_options = ExportOptions(fake_context=None)
	_ = torch.onnx.dynamo_export(
	fake_model, real_x, export_options=export_options
	)

	# Scenario 3: Real model and real input WITH fake_context
	with self.assertRaises(torch.onnx.OnnxExporterError):
	export_options = ExportOptions(fake_context=fake_context)
	_ = torch.onnx.dynamo_export(
	real_model, real_x, export_options=export_options
	)

	# Scenario 4: Fake model and real input WITH fake_context
	with self.assertRaises(torch.onnx.OnnxExporterError):
	export_options = ExportOptions(fake_context=fake_context)
	_ = torch.onnx.dynamo_export(
	fake_model, real_x, export_options=export_options
	)

	# NOTE: To all transformer models, config is preferred to pre-trained model for testing because:
	# 1. Pre-trained model is too big for CI
	# 2. Pre-trained model is has uint8/bool issue: https://github.com/huggingface/transformers/issues/21013
	def test_fake_tensor_mode_huggingface_gpt2(self):
	config = transformers.GPT2Config(
	vocab_size=8096, n_positions=256, n_embd=256, n_layer=2, n_head=2
	)
	batch, seq = 4, 256

	with torch.onnx.enable_fake_mode() as fake_context:
	model = transformers.GPT2Model(config).eval()
	input_ids = torch.randint(0, config.vocab_size, (batch, seq))
	attention_mask = torch.ones(batch, seq, dtype=torch.bool)
	position_ids = torch.arange(0, seq, dtype=torch.long)
	position_ids = position_ids.unsqueeze(0).view(-1, seq)

	export_options = torch.onnx.ExportOptions(fake_context=fake_context)
	export_output = torch.onnx.dynamo_export(
	model,
	input_ids=input_ids,
	attention_mask=attention_mask,
	position_ids=position_ids,
	export_options=export_options,
	)
	onnx.checker.check_model(export_output.model_proto)
	onnx.shape_inference.infer_shapes(export_output.model_proto)

	def test_fake_tensor_mode_huggingface_bigscience_bloom(self):
	config = transformers.BloomConfig()
	batch, seq = 4, 256

	with torch.onnx.enable_fake_mode() as fake_context:
	model = transformers.BloomModel(config).eval()
	input_ids = torch.randint(0, config.vocab_size, (batch, seq))
	attention_mask = torch.ones(batch, seq, dtype=torch.bool)

	export_options = torch.onnx.ExportOptions(fake_context=fake_context)
	export_output = torch.onnx.dynamo_export(
	model,
	input_ids=input_ids,
	attention_mask=attention_mask,
	export_options=export_options,
	)
	onnx.checker.check_model(export_output.model_proto)
	onnx.shape_inference.infer_shapes(export_output.model_proto)

	def test_fake_tensor_mode_huggingface_open_llama(self):
	config = transformers.OpenLlamaConfig(
	vocab_size=8096, hidden_size=256, num_hidden_layers=2, num_attention_heads=2
	)
	batch, seq = 4, 256

	with torch.onnx.enable_fake_mode() as fake_context:
	model = transformers.OpenLlamaModel(config).eval()
	input_ids = torch.randint(0, config.vocab_size, (batch, seq))
	attention_mask = torch.ones(batch, seq, dtype=torch.bool)
	position_ids = torch.arange(0, seq, dtype=torch.long)
	position_ids = position_ids.unsqueeze(0).view(-1, seq)

	export_options = torch.onnx.ExportOptions(fake_context=fake_context)
	export_output = torch.onnx.dynamo_export(
	model,
	input_ids=input_ids,
	attention_mask=attention_mask,
	position_ids=position_ids,
	export_options=export_options,
	)
	onnx.checker.check_model(export_output.model_proto)
	onnx.shape_inference.infer_shapes(export_output.model_proto)

	def test_fake_tensor_mode_huggingface_google_t5(self):
	config = transformers.T5Config(
	vocab_size=8096, d_model=256, num_layers=2, num_heads=2
	)
	device = "cpu"
	batch, seq = 4, 256
	with torch.onnx.enable_fake_mode() as fake_context:
	model = transformers.T5Model(config).to(device).eval()
	input_ids = torch.randint(0, config.vocab_size, (batch, seq))
	attention_mask = torch.ones((batch, seq), dtype=torch.bool)
	decoder_input_ids = torch.randint(0, config.vocab_size, (batch, seq))
	export_options = torch.onnx.ExportOptions(fake_context=fake_context)
	export_output = torch.onnx.dynamo_export(
	model,
	input_ids=input_ids,
	attention_mask=attention_mask,
	decoder_input_ids=decoder_input_ids,
	export_options=export_options,
	)
	onnx.checker.check_model(export_output.model_proto)
	onnx.shape_inference.infer_shapes(export_output.model_proto)

	def test_fake_tensor_mode_huggingface_openai_whisper(self):
	config = transformers.WhisperConfig()
	feature_extractor = transformers.WhisperFeatureExtractor()
	device = "cpu"
	batch = 4
	with torch.onnx.enable_fake_mode() as fake_context:
	input_features = torch.randn(
	(
	batch,
	feature_extractor.feature_size,
	feature_extractor.nb_max_frames,
	),
	dtype=torch.float32,
	)
	decoder_input_ids = torch.tensor([[1, 1]]) * config.decoder_start_token_id
	model = transformers.AutoModel.from_config(config).to(device).eval()
	export_options = torch.onnx.ExportOptions(fake_context=fake_context)
	export_output = torch.onnx.dynamo_export(
	model,
	input_features,
	decoder_input_ids=decoder_input_ids,
	export_options=export_options,
	)
	onnx.checker.check_model(export_output.model_proto)
	onnx.shape_inference.infer_shapes(export_output.model_proto)

	# TODO: From Config/Model
	@pytorch_test_common.skip_in_ci(
	"Not decorated with xfail because CI doesn't have enough memory to run and then fail."
	"SymFloat in OnnxFUnction attribute is not supported yet."
	)
	def test_fake_tensor_mode_huggingface_databricks_dolly_v2_3b(self):
	# TODO: Make this test work with config
	# Dolly has no config on transformers
	model_name = "databricks/dolly-v2-3b"
	device = "cpu"
	with torch.onnx.enable_fake_mode() as fake_context:
	tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
	inputs = tokenizer("Hello world!", return_tensors="pt")
	model = transformers.AutoModel.from_pretrained(model_name).to(device).eval()

	export_options = torch.onnx.ExportOptions(fake_context=fake_context)
	export_output = torch.onnx.dynamo_export(
	model, **inputs, export_options=export_options
	)
	onnx.checker.check_model(export_output.model_proto)
	onnx.shape_inference.infer_shapes(export_output.model_proto)

	@pytorch_test_common.skip_in_ci(
	"Not decorated with xfail because CI doesn't have enough memory to run and then fail."
	"AssertionError: Mutating module attribute seq_len_cached during export."
	"self.seq_len_cached = seq_len"
	)
	def test_fake_tensor_mode_huggingface_tiiuae_falcon(self):
	config = transformers.FalconConfig()
	batch, seq = 4, 256

	with torch.onnx.enable_fake_mode() as fake_context:
	model = transformers.FalconModel(config).eval()
	input_ids = torch.randint(0, config.vocab_size, (batch, seq))
	attention_mask = torch.ones(batch, seq, dtype=torch.bool)

	export_options = torch.onnx.ExportOptions(fake_context=fake_context)
	export_output = torch.onnx.dynamo_export(
	model,
	input_ids=input_ids,
	attention_mask=attention_mask,
	export_options=export_options,
	)
	onnx.checker.check_model(export_output.model_proto)
	onnx.shape_inference.infer_shapes(export_output.model_proto)

	@pytorch_test_common.skip_in_ci(
	"Not decorated with xfail because CI doesn't have enough memory to run and then fail."
	"torch._dynamo.exc.UserError: Dynamic control flow is not supported at the moment. "
	"Please use functorch.experimental.control_flow.cond to explicitly capture the control flow"
	)
	def test_fake_tensor_mode_huggingface_mosaicml_mpt_7b(self):
	# TODO: Make this test work with config
	# mpt-7b has no config on transformers
	model_name = "mosaicml/mpt-7b"
	device = "cpu"
	with torch.onnx.enable_fake_mode() as fake_context:
	tokenizer = transformers.AutoTokenizer.from_pretrained(
	model_name, trust_remote_code=True
	)
	inputs = tokenizer("Hello world!", return_tensors="pt")
	model = (
	transformers.AutoModelForCausalLM.from_pretrained(
	model_name, trust_remote_code=True
	)
	.to(device)
	.eval()
	)

	export_options = torch.onnx.ExportOptions(fake_context=fake_context)
	export_output = torch.onnx.dynamo_export(
	model, **inputs, export_options=export_options
	)
	onnx.checker.check_model(export_output.model_proto)
	onnx.shape_inference.infer_shapes(export_output.model_proto)


	if __name__ == "__main__":
	common_utils.run_tests()