blob: 67d519a688e3d981287a4d99bb8dd0a51396c3ad [file] [log] [blame]
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import gc
import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManagerAdaptor
from executorch.backends.qualcomm.utils.utils import (
canonicalize_program,
generate_qnn_executorch_option,
)
from executorch.exir.backend.backend_api import to_backend
from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
def get_encoding(
path_to_shard: str,
compiler_specs: str,
get_input: bool,
get_output: bool,
num_input: int,
num_output: int,
):
encoding_list = []
with open(path_to_shard, "rb") as f:
ctx_bin = f.read()
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
generate_qnn_executorch_option(compiler_specs), ctx_bin
)
assert qnn_mgr.Init().value == 0, "failed to load context binary"
qnn_mgr.AllocateTensor()
if get_input:
encoding_input = {"scale": [], "offset": []}
for i in range(num_input):
inputs = qnn_mgr.GetGraphInputs()[i]
encoding = inputs.GetEncodings()
encoding_input["scale"].append(encoding.data["scale"].item())
encoding_input["offset"].append(encoding.data["offset"].item())
encoding_list.append(encoding_input)
if get_output:
encoding_output = {"scale": [], "offset": []}
for i in range(num_output):
outputs = qnn_mgr.GetGraphOutputs()[i]
encoding = outputs.GetEncodings()
encoding_output["scale"].append(encoding.data["scale"].item())
encoding_output["offset"].append(encoding.data["offset"].item())
encoding_list.append(encoding_output)
qnn_mgr.Destroy()
return encoding_list
def gen_pte_from_ctx_bin(
artifact, pte_names, compiler_specs, bundle_programs, custom_spill_fill=None
):
# Lower with QnnBackend
lowered_modules = [
to_backend("QnnBackend", prog["edge_program"], compiler_specs)
for prog in bundle_programs
]
# Setup spill-fill buffer for relieving runtime memory usage
canonicalize_program(lowered_modules, custom_buffer_size=custom_spill_fill)
# export pte files
pte_files = []
for pte_name in pte_names:
print(f"{pte_name} generating...")
memory_planning_pass = MemoryPlanningPass(
memory_planning_algo="greedy",
alloc_graph_input=False,
alloc_graph_output=False,
)
pte_files.append(f"{artifact}/{pte_name}.pte")
with open(pte_files[-1], "wb") as file:
file.write(
lowered_modules[0].buffer(
extract_delegate_segments=True, memory_planning=memory_planning_pass
)
)
# GC for reducing host memory consuming
bundle_programs.pop(0)
lowered_modules.pop(0)
gc.collect()
return pte_files