examples/qualcomm/qaihub_scripts/utils/utils.py - platform/external/executorch - Git at Google

 # Copyright (c) Qualcomm Innovation Center, Inc.
 # All rights reserved
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.

 import gc

 import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManagerAdaptor

 from executorch.backends.qualcomm.utils.utils import (
     generate_qnn_executorch_option,
     update_spill_fill_size,
 )


 def preprocess_binary(ctx_bin, compiler_specs):
     qnn_mgr = PyQnnManagerAdaptor.QnnManager(
         generate_qnn_executorch_option(compiler_specs),
     )
     return bytes(qnn_mgr.MakeBinaryInfo(ctx_bin))


 def get_encoding(
     path_to_shard: str,
     compiler_specs: str,
     get_input: bool,
     get_output: bool,
     num_input: int,
     num_output: int,
 ):
     encoding_list = []
     with open(path_to_shard, "rb") as f:
         ctx_bin = preprocess_binary(f.read(), compiler_specs)
         qnn_mgr = PyQnnManagerAdaptor.QnnManager(
             generate_qnn_executorch_option(compiler_specs), ctx_bin
         )
         assert qnn_mgr.Init().value == 0, "failed to load context binary"
         graph_name = qnn_mgr.GetGraphNames()[0]
         qnn_mgr.AllocateTensor(graph_name)
         if get_input:
             encoding_input = {"scale": [], "offset": []}
             for i in range(num_input):
                 inputs = qnn_mgr.GetGraphInputs(graph_name)[i]
                 encoding = inputs.GetEncodings()
                 encoding_input["scale"].append(encoding.data["scale"].item())
                 encoding_input["offset"].append(encoding.data["offset"].item())
             encoding_list.append(encoding_input)
         if get_output:
             encoding_output = {"scale": [], "offset": []}
             for i in range(num_output):
                 outputs = qnn_mgr.GetGraphOutputs(graph_name)[i]
                 encoding = outputs.GetEncodings()
                 encoding_output["scale"].append(encoding.data["scale"].item())
                 encoding_output["offset"].append(encoding.data["offset"].item())
             encoding_list.append(encoding_output)
         qnn_mgr.Destroy()
     return encoding_list


 def gen_pte_from_ctx_bin(artifact, pte_names, bundle_programs, backend_config):
     edge_prog_mgrs = [prog["edge_program_manager"] for prog in bundle_programs]
     # Setup spill-fill buffer for relieving runtime memory usage
     update_spill_fill_size(
         [
             prog_mgr._edge_programs[list(prog_mgr.methods)[0]]
             for prog_mgr in edge_prog_mgrs
         ]
     )
     # Export pte files
     pte_files = []
     for pte_name in pte_names:
         print(f"{pte_name} generating...")
         pte_files.append(f"{artifact}/{pte_name}.pte")
         with open(pte_files[-1], "wb") as f:
             edge_prog_mgrs[0].to_executorch(config=backend_config).write_to_file(f)
         # GC for reducing host memory consuming
         bundle_programs.pop(0)
         edge_prog_mgrs.pop(0)
         gc.collect()

     return pte_files
	# Copyright (c) Qualcomm Innovation Center, Inc.
	# All rights reserved
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	import gc

	import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManagerAdaptor

	from executorch.backends.qualcomm.utils.utils import (
	generate_qnn_executorch_option,
	update_spill_fill_size,
	)


	def preprocess_binary(ctx_bin, compiler_specs):
	qnn_mgr = PyQnnManagerAdaptor.QnnManager(
	generate_qnn_executorch_option(compiler_specs),
	)
	return bytes(qnn_mgr.MakeBinaryInfo(ctx_bin))


	def get_encoding(
	path_to_shard: str,
	compiler_specs: str,
	get_input: bool,
	get_output: bool,
	num_input: int,
	num_output: int,
	):
	encoding_list = []
	with open(path_to_shard, "rb") as f:
	ctx_bin = preprocess_binary(f.read(), compiler_specs)
	qnn_mgr = PyQnnManagerAdaptor.QnnManager(
	generate_qnn_executorch_option(compiler_specs), ctx_bin
	)
	assert qnn_mgr.Init().value == 0, "failed to load context binary"
	graph_name = qnn_mgr.GetGraphNames()[0]
	qnn_mgr.AllocateTensor(graph_name)
	if get_input:
	encoding_input = {"scale": [], "offset": []}
	for i in range(num_input):
	inputs = qnn_mgr.GetGraphInputs(graph_name)[i]
	encoding = inputs.GetEncodings()
	encoding_input["scale"].append(encoding.data["scale"].item())
	encoding_input["offset"].append(encoding.data["offset"].item())
	encoding_list.append(encoding_input)
	if get_output:
	encoding_output = {"scale": [], "offset": []}
	for i in range(num_output):
	outputs = qnn_mgr.GetGraphOutputs(graph_name)[i]
	encoding = outputs.GetEncodings()
	encoding_output["scale"].append(encoding.data["scale"].item())
	encoding_output["offset"].append(encoding.data["offset"].item())
	encoding_list.append(encoding_output)
	qnn_mgr.Destroy()
	return encoding_list


	def gen_pte_from_ctx_bin(artifact, pte_names, bundle_programs, backend_config):
	edge_prog_mgrs = [prog["edge_program_manager"] for prog in bundle_programs]
	# Setup spill-fill buffer for relieving runtime memory usage
	update_spill_fill_size(
	[
	prog_mgr._edge_programs[list(prog_mgr.methods)[0]]
	for prog_mgr in edge_prog_mgrs
	]
	)
	# Export pte files
	pte_files = []
	for pte_name in pte_names:
	print(f"{pte_name} generating...")
	pte_files.append(f"{artifact}/{pte_name}.pte")
	with open(pte_files[-1], "wb") as f:
	edge_prog_mgrs[0].to_executorch(config=backend_config).write_to_file(f)
	# GC for reducing host memory consuming
	bundle_programs.pop(0)
	edge_prog_mgrs.pop(0)
	gc.collect()

	return pte_files