torch_npu.profiler.profile

接口原型

torch_npu.profiler.profile(activities=None, schedule=None, on_trace_ready=None, record_shapes=False, profile_memory=False, with_stack=False, with_flops=False, with_modules=False, experimental_config=None, use_cuda=None)

功能描述

提供对训练过程数据的profiling功能。

参数说明

调用示例

experimental_config = torch_npu.profiler._ExperimentalConfig(
	aic_metrics=torch_npu.profiler.AiCMetrics.PipeUtilization,
	profiler_level=torch_npu.profiler.ProfilerLevel.Level1,
	l2_cache=False
)
with torch_npu.profiler.profile(
	activities=[
		torch_npu.profiler.ProfilerActivity.CPU,
		torch_npu.profiler.ProfilerActivity.NPU
		],
	schedule=torch_npu.profiler.schedule(wait=0, warmup=0, active=1, repeat=1, skip_first=1),
	on_trace_ready=torch_npu.profiler.tensorboard_trace_handler("./result"),
	record_shapes=True,
	profile_memory=True,
	with_stack=True,
	with_flops=False,
	with_modules=False,
	experimental_config=experimental_config) as prof:
		for step in range(steps):
			train_one_step(step, steps, train_loader, model, optimizer, criterion)
			prof.step()