|  | import tempfile | 
|  | import contextlib | 
|  | from . import cudart, check_error | 
|  |  | 
|  |  | 
|  | DEFAULT_FLAGS = [ | 
|  | "gpustarttimestamp", | 
|  | "gpuendtimestamp", | 
|  | "gridsize3d", | 
|  | "threadblocksize", | 
|  | "streamid", | 
|  | "enableonstart 0", | 
|  | "conckerneltrace", | 
|  | ] | 
|  |  | 
|  |  | 
|  | def init(output_file, flags=None, output_mode='key_value'): | 
|  | rt = cudart() | 
|  | if not hasattr(rt, 'cudaOutputMode'): | 
|  | raise AssertionError("HIP does not support profiler initialization!") | 
|  | flags = DEFAULT_FLAGS if flags is None else flags | 
|  | if output_mode == 'key_value': | 
|  | output_mode_enum = rt.cudaOutputMode.KeyValuePair | 
|  | elif output_mode == 'csv': | 
|  | output_mode_enum = rt.cudaOutputMode.CSV | 
|  | else: | 
|  | raise RuntimeError("supported CUDA profiler output modes are: key_value and csv") | 
|  | with tempfile.NamedTemporaryFile(delete=True) as f: | 
|  | f.write(b'\n'.join(f.encode('ascii') for f in flags)) | 
|  | f.flush() | 
|  | check_error(rt.cudaProfilerInitialize(f.name, output_file, output_mode_enum)) | 
|  |  | 
|  |  | 
|  | def start(): | 
|  | check_error(cudart().cudaProfilerStart()) | 
|  |  | 
|  |  | 
|  | def stop(): | 
|  | check_error(cudart().cudaProfilerStop()) | 
|  |  | 
|  |  | 
|  | @contextlib.contextmanager | 
|  | def profile(): | 
|  | try: | 
|  | start() | 
|  | yield | 
|  | finally: | 
|  | stop() |