achieved_occupancy |
sm__warps_active.avg.pct_of_peak_sustained_active |
atomic_transactions |
l1tex__t_set_accesses_pipe_lsu_mem_global_op_atom.sum + l1tex__t_set_accesses_pipe_lsu_mem_global_op_red.sum |
atomic_transactions_per_request |
(l1tex__t_sectors_pipe_lsu_mem_global_op_atom.sum + l1tex__t_sectors_pipe_lsu_mem_global_op_red.sum) / (l1tex__t_requests_pipe_lsu_mem_global_op_atom.sum + l1tex__t_requests_pipe_lsu_mem_global_op_red.sum) |
branch_efficiency |
smsp__sass_average_branch_targets_threads_uniform.pct |
cf_executed |
smsp__inst_executed_pipe_cbu.sum + smsp__inst_executed_pipe_adu.sum |
cf_fu_utilization |
n/a |
cf_issued |
n/a |
double_precision_fu_utilization |
smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active |
dram_read_bytes |
dram__bytes_read.sum |
dram_read_throughput |
dram__bytes_read.sum.per_second |
dram_read_transactions |
dram__sectors_read.sum |
dram_utilization |
dram__throughput.avg.pct_of_peak_sustained_elapsed |
dram_write_bytes |
dram__bytes_write.sum |
dram_write_throughput |
dram__bytes_write.sum.per_second |
dram_write_transactions |
dram__sectors_write.sum |
eligible_warps_per_cycle |
smsp__warps_eligible.sum.per_cycle_active |
flop_count_dp |
smsp__sass_thread_inst_executed_op_dadd_pred_on.sum + smsp__sass_thread_inst_executed_op_dmul_pred_on.sum + smsp__sass_thread_inst_executed_op_dfma_pred_on.sum * 2 |
flop_count_dp_add |
smsp__sass_thread_inst_executed_op_dadd_pred_on.sum |
flop_count_dp_fma |
smsp__sass_thread_inst_executed_op_dfma_pred_on.sum |
flop_count_dp_mul |
smsp__sass_thread_inst_executed_op_dmul_pred_on.sum |
flop_count_hp |
smsp__sass_thread_inst_executed_op_hadd_pred_on.sum + smsp__sass_thread_inst_executed_op_hmul_pred_on.sum + smsp__sass_thread_inst_executed_op_hfma_pred_on.sum * 2 |
flop_count_hp_add |
smsp__sass_thread_inst_executed_op_hadd_pred_on.sum |
flop_count_hp_fma |
smsp__sass_thread_inst_executed_op_hfma_pred_on.sum |
flop_count_hp_mul |
smsp__sass_thread_inst_executed_op_hmul_pred_on.sum |
flop_count_sp |
smsp__sass_thread_inst_executed_op_fadd_pred_on.sum + smsp__sass_thread_inst_executed_op_fmul_pred_on.sum + smsp__sass_thread_inst_executed_op_ffma_pred_on.sum * 2 |
flop_count_sp_add |
smsp__sass_thread_inst_executed_op_fadd_pred_on.sum |
flop_count_sp_fma |
smsp__sass_thread_inst_executed_op_ffma_pred_on.sum |
flop_count_sp_mul |
smsp__sass_thread_inst_executed_op_fmul_pred_on.sum |
flop_count_sp_special |
n/a |
flop_dp_efficiency |
smsp__sass_thread_inst_executed_ops_dadd_dmul_dfma_pred_on.avg.pct_of_peak_sustained_elapsed |
flop_hp_efficiency |
smsp__sass_thread_inst_executed_ops_hadd_hmul_hfma_pred_on.avg.pct_of_peak_sustained_elapsed |
flop_sp_efficiency |
smsp__sass_thread_inst_executed_ops_fadd_fmul_ffma_pred_on.avg.pct_of_peak_sustained_elapsed |
gld_efficiency |
smsp__sass_average_data_bytes_per_sector_mem_global_op_ld.pct |
gld_requested_throughput |
n/a |
gld_throughput |
l1tex__t_bytes_pipe_lsu_mem_global_op_ld.sum.per_second |
gld_transactions |
l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum |
gld_transactions_per_request |
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio |
global_atomic_requests |
l1tex__t_requests_pipe_lsu_mem_global_op_atom.sum |
global_hit_rate |
(l1tex__t_sectors_pipe_lsu_mem_global_op_ld_lookup_hit.sum + l1tex__t_sectors_pipe_lsu_mem_global_op_st_lookup_hit.sum + l1tex__t_sectors_pipe_lsu_mem_global_op_red_lookup_hit.sum + l1tex__t_sectors_pipe_lsu_mem_global_op_atom_lookup_hit.sum) / (l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum + l1tex__t_sectors_pipe_lsu_mem_global_op_st.sum + l1tex__t_sectors_pipe_lsu_mem_global_op_red.sum + l1tex__t_sectors_pipe_lsu_mem_global_op_atom.sum) |
global_load_requests |
l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum |
global_reduction_requests |
l1tex__t_requests_pipe_lsu_mem_global_op_red.sum |
global_store_requests |
l1tex__t_requests_pipe_lsu_mem_global_op_st.sum |
gst_efficiency |
smsp__sass_average_data_bytes_per_sector_mem_global_op_st.pct |
gst_requested_throughput |
n/a |
gst_throughput |
l1tex__t_bytes_pipe_lsu_mem_global_op_st.sum.per_second |
gst_transactions |
l1tex__t_sectors_pipe_lsu_mem_global_op_st.sum |
gst_transactions_per_request |
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio |
half_precision_fu_utilization |
smsp__inst_executed_pipe_fp16.avg.pct_of_peak_sustained_active |
inst_bit_convert |
smsp__sass_thread_inst_executed_op_conversion_pred_on.sum |
inst_compute_ld_st |
smsp__sass_thread_inst_executed_op_memory_pred_on.sum |
inst_control |
smsp__sass_thread_inst_executed_op_control_pred_on.sum |
inst_executed |
smsp__inst_executed.sum |
inst_executed_global_atomics |
smsp__sass_inst_executed_op_global_atom.sum |
inst_executed_global_loads |
smsp__inst_executed_op_global_ld.sum |
inst_executed_global_reductions |
smsp__inst_executed_op_global_red.sum |
inst_executed_global_stores |
smsp__inst_executed_op_global_st.sum |
inst_executed_local_loads |
smsp__inst_executed_op_local_ld.sum |
inst_executed_local_stores |
smsp__inst_executed_op_local_st.sum |
inst_executed_shared_atomics |
smsp__inst_executed_op_shared_atom.sum + smsp__inst_executed_op_shared_atom_dot_alu.sum + smsp__inst_executed_op_shared_atom_dot_cas.sum |
inst_executed_shared_loads |
smsp__inst_executed_op_shared_ld.sum |
inst_executed_shared_stores |
smsp__inst_executed_op_shared_st.sum |
inst_executed_surface_atomics |
smsp__inst_executed_op_surface_atom.sum |
inst_executed_surface_loads |
smsp__inst_executed_op_surface_ld.sum + smsp__inst_executed_op_shared_atom_dot_alu.sum + smsp__inst_executed_op_shared_atom_dot_cas.sum |
inst_executed_surface_reductions |
smsp__inst_executed_op_surface_red.sum |
inst_executed_surface_stores |
smsp__inst_executed_op_surface_st.sum |
inst_executed_tex_ops |
smsp__inst_executed_op_texture.sum |
inst_fp_16 |
smsp__sass_thread_inst_executed_op_fp16_pred_on.sum |
inst_fp_32 |
smsp__sass_thread_inst_executed_op_fp32_pred_on.sum |
inst_fp_64 |
smsp__sass_thread_inst_executed_op_fp64_pred_on.sum |
inst_integer |
smsp__sass_thread_inst_executed_op_integer_pred_on.sum |
inst_inter_thread_communication |
smsp__sass_thread_inst_executed_op_inter_thread_communication_pred_on.sum |
inst_issued |
smsp__inst_issued.sum |
inst_misc |
smsp__sass_thread_inst_executed_op_misc_pred_on.sum |
inst_per_warp |
smsp__average_inst_executed_per_warp.ratio |
inst_replay_overhead |
n/a |
ipc |
smsp__inst_executed.avg.per_cycle_active |
issue_slot_utilization |
smsp__issue_active.avg.pct_of_peak_sustained_active |
issue_slots |
smsp__inst_issued.sum |
issued_ipc |
smsp__inst_issued.avg.per_cycle_active |
l1_sm_lg_utilization |
l1tex__lsu_writeback_active.avg.pct_of_peak_sustained_active |
l2_atomic_throughput |
2 * ( lts__t_sectors_op_atom.sum.per_second + lts__t_sectors_op_red.sum.per_second ) |
l2_atomic_transactions |
2 * ( lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum ) |
l2_global_atomic_store_bytes |
lts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_global_op_atom.sum |
l2_global_load_bytes |
lts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_global_op_ld.sum |
l2_local_global_store_bytes |
lts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_local_op_st.sum + lts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_global_op_st.sum |
l2_local_load_bytes |
lts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_local_op_ld.sum |
l2_read_throughput |
lts__t_sectors_op_read.sum.per_second + lts__t_sectors_op_atom.sum.per_second + lts__t_sectors_op_red.sum.per_second |
l2_read_transactions |
lts__t_sectors_op_read.sum + lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum |
l2_surface_load_bytes |
lts__t_bytes_equiv_l1sectormiss_pipe_tex_mem_surface_op_ld.sum |
l2_surface_store_bytes |
lts__t_bytes_equiv_l1sectormiss_pipe_tex_mem_surface_op_st.sum |
l2_tex_hit_rate |
lts__t_sector_hit_rate.pct |
l2_tex_read_hit_rate |
lts__t_sector_op_read_hit_rate.pct |
l2_tex_read_throughput |
lts__t_sectors_srcunit_tex_op_read.sum.per_second |
l2_tex_read_transactions |
lts__t_sectors_srcunit_tex_op_read.sum |
l2_tex_write_hit_rate |
lts__t_sector_op_write_hit_rate.pct |
l2_tex_write_throughput |
lts__t_sectors_srcunit_tex_op_write.sum.per_second |
l2_tex_write_transactions |
lts__t_sectors_srcunit_tex_op_write.sum |
l2_utilization |
lts__t_sectors.avg.pct_of_peak_sustained_elapsed |
l2_write_throughput |
lts__t_sectors_op_write.sum.per_second + lts__t_sectors_op_atom.sum.per_second + lts__t_sectors_op_red.sum.per_second |
l2_write_transactions |
lts__t_sectors_op_write.sum + lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum |
ldst_executed |
n/a |
ldst_fu_utilization |
smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active |
ldst_issued |
n/a |
local_hit_rate |
n/a |
local_load_requests |
l1tex__t_requests_pipe_lsu_mem_local_op_ld.sum |
local_load_throughput |
l1tex__t_bytes_pipe_lsu_mem_local_op_ld.sum.per_second |
local_load_transactions |
l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum |
local_load_transactions_per_request |
l1tex__average_t_sectors_per_request_pipe_lsu_mem_local_op_ld.ratio |
local_memory_overhead |
n/a |
local_store_requests |
l1tex__t_requests_pipe_lsu_mem_local_op_st.sum |
local_store_throughput |
l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum.per_second |
local_store_transactions |
l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum |
local_store_transactions_per_request |
l1tex__average_t_sectors_per_request_pipe_lsu_mem_local_op_st.ratio |
nvlink_data_receive_efficiency |
n/a |
nvlink_data_transmission_efficiency |
n/a |
nvlink_overhead_data_received |
(nvlrx__bytes_data_protocol.sum / nvlrx__bytes.sum) * 100 |
nvlink_overhead_data_transmitted |
(nvltx__bytes_data_protocol.sum / nvltx__bytes.sum) * 100 |
nvlink_receive_throughput |
nvlrx__bytes.sum.per_second |
nvlink_total_data_received |
nvlrx__bytes.sum |
nvlink_total_data_transmitted |
nvltx__bytes.sum |
nvlink_total_nratom_data_transmitted |
n/a |
nvlink_total_ratom_data_transmitted |
n/a |
nvlink_total_response_data_received |
n/a |
nvlink_total_write_data_transmitted |
n/a |
nvlink_transmit_throughput |
nvltx__bytes.sum.per_second |
nvlink_user_data_received |
nvlrx__bytes_data_user.sum |
nvlink_user_data_transmitted |
nvltx__bytes_data_user.sum |
nvlink_user_nratom_data_transmitted |
n/a |
nvlink_user_ratom_data_transmitted |
n/a |
nvlink_user_response_data_received |
n/a |
nvlink_user_write_data_transmitted |
n/a |
pcie_total_data_received |
pcie__read_bytes.sum |
pcie_total_data_transmitted |
pcie__write_bytes.sum |
shared_efficiency |
smsp__sass_average_data_bytes_per_wavefront_mem_shared.pct |
shared_load_throughput |
l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum.per_second |
shared_load_transactions |
l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum |
shared_load_transactions_per_request |
n/a |
shared_store_throughput |
l1tex__data_pipe_lsu_wavefronts_mem_shared_op_st.sum.per_second |
shared_store_transactions |
l1tex__data_pipe_lsu_wavefronts_mem_shared_op_st.sum |
shared_store_transactions_per_request |
n/a |
shared_utilization |
l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed |
single_precision_fu_utilization |
smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active |
sm_efficiency |
smsp__cycles_active.avg.pct_of_peak_sustained_elapsed |
sm_tex_utilization |
l1tex__texin_sm2tex_req_cycles_active.avg.pct_of_peak_sustained_elapsed |
special_fu_utilization |
smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active |
stall_constant_memory_dependency |
smsp__warp_issue_stalled_imc_miss_per_warp_active.pct |
stall_exec_dependency |
smsp__warp_issue_stalled_short_scoreboard_per_warp_active.pct + smsp__warp_issue_stalled_wait_per_warp_active.pct |
stall_inst_fetch |
smsp__warp_issue_stalled_no_instruction_per_warp_active.pct |
stall_memory_dependency |
smsp__warp_issue_stalled_long_scoreboard_per_warp_active.pct |
stall_memory_throttle |
smsp__warp_issue_stalled_drain_per_warp_active.pct + smsp__warp_issue_stalled_lg_throttle_per_warp_active.pct |
stall_not_selected |
smsp__warp_issue_stalled_not_selected_per_warp_active.pct |
stall_other |
smsp__warp_issue_stalled_dispatch_stall_per_warp_active.pct + smsp__warp_issue_stalled_misc_per_warp_active.pct |
stall_pipe_busy |
smsp__warp_issue_stalled_math_pipe_throttle_per_warp_active.pct + smsp__warp_issue_stalled_mio_throttle_per_warp_active.pct |
stall_sleeping |
smsp__warp_issue_stalled_sleeping_per_warp_active.pct |
stall_sync |
smsp__warp_issue_stalled_barrier_per_warp_active.pct + smsp__warp_issue_stalled_membar_per_warp_active.pct |
stall_texture |
smsp__warp_issue_stalled_tex_throttle_per_warp_active.pct |
surface_atomic_requests |
l1tex__t_requests_pipe_tex_mem_surface_op_atom.sum |
surface_load_requests |
l1tex__t_requests_pipe_tex_mem_surface_op_ld.sum |
surface_reduction_requests |
l1tex__t_requests_pipe_tex_mem_surface_op_red.sum |
surface_store_requests |
l1tex__t_requests_pipe_tex_mem_surface_op_st.sum |
sysmem_read_bytes |
lts__t_sectors_aperture_sysmem_op_read * 32 |
sysmem_read_throughput |
lts__t_sectors_aperture_sysmem_op_read.sum.per_second |
sysmem_read_transactions |
lts__t_sectors_aperture_sysmem_op_read.sum |
sysmem_read_utilization |
n/a |
sysmem_utilization |
n/a |
sysmem_write_bytes |
lts__t_sectors_aperture_sysmem_op_write * 32 |
sysmem_write_throughput |
lts__t_sectors_aperture_sysmem_op_write.sum.per_second |
sysmem_write_transactions |
lts__t_sectors_aperture_sysmem_op_write.sum |
sysmem_write_utilization |
n/a |
tensor_precision_fu_utilization |
sm__pipe_tensor_op_hmma_cycles_active.avg.pct_of_peak_sustained_active |
tensor_precision_int_utilization |
sm__pipe_tensor_op_imma_cycles_active.avg.pct_of_peak_sustained_active (SM 7.2+) |
tex_cache_hit_rate |
l1tex__t_sector_hit_rate.pct |
tex_cache_throughput |
n/a |
tex_cache_transactions |
l1tex__lsu_writeback_active.avg.pct_of_peak_sustained_active + l1tex__tex_writeback_active.avg.pct_of_peak_sustained_active |
tex_fu_utilization |
smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active |
tex_sm_tex_utilization |
l1tex__f_tex2sm_cycles_active.avg.pct_of_peak_sustained_elapsed |
tex_sm_utilization |
sm__mio2rf_writeback_active.avg.pct_of_peak_sustained_elapsed |
tex_utilization |
n/a |
texture_load_requests |
l1tex__t_requests_pipe_tex_mem_texture.sum |
warp_execution_efficiency |
smsp__thread_inst_executed_per_inst_executed.ratio |
warp_nonpred_execution_efficiency |
smsp__thread_inst_executed_per_inst_executed.pct |