Hi,
Suppose I want to generate and print the ptx of the following:
from numba import cuda, int32
def add(a, b):
return a+b
ptx = cuda.compile_ptx(add, (int32, int32), device=True, cc=(6,0))[0]
print(ptx)
That will print the following:
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-29190527
// Cuda compilation tools, release 11.1, V11.1.105
// Based on LLVM 3.4svn
//
.version 7.1
.target sm_60
.address_size 64
// .globl _ZN8__main__7add_241B82cw51cXTLSUwv1kAPW1tQPAP9CY9GJAHUqIFJIBltW60OjnB1KwVoKKiDnAeKfyQhmB_2bRhID6gTzjWk0AEii
.common .global .align 8 .u64 _ZN08NumbaEnv8__main__7add_241B82cw51cXTLSUwv1kAPW1tQPAP9CY9GJAHUqIFJIBltW60OjnB1KwVoKKiDnAeKfyQhmB_2bRhID6gTzjWk0AEii;
.visible .func (.param .b32 func_retval0) _ZN8__main__7add_241B82cw51cXTLSUwv1kAPW1tQPAP9CY9GJAHUqIFJIBltW60OjnB1KwVoKKiDnAeKfyQhmB_2bRhID6gTzjWk0AEii(
.param .b64 _ZN8__main__7add_241B82cw51cXTLSUwv1kAPW1tQPAP9CY9GJAHUqIFJIBltW60OjnB1KwVoKKiDnAeKfyQhmB_2bRhID6gTzjWk0AEii_param_0,
.param .b32 _ZN8__main__7add_241B82cw51cXTLSUwv1kAPW1tQPAP9CY9GJAHUqIFJIBltW60OjnB1KwVoKKiDnAeKfyQhmB_2bRhID6gTzjWk0AEii_param_1,
.param .b32 _ZN8__main__7add_241B82cw51cXTLSUwv1kAPW1tQPAP9CY9GJAHUqIFJIBltW60OjnB1KwVoKKiDnAeKfyQhmB_2bRhID6gTzjWk0AEii_param_2
)
{
.reg .b32 %r<2>;
.reg .b64 %rd<5>;
ld.param.u64 %rd1, [_ZN8__main__7add_241B82cw51cXTLSUwv1kAPW1tQPAP9CY9GJAHUqIFJIBltW60OjnB1KwVoKKiDnAeKfyQhmB_2bRhID6gTzjWk0AEii_param_0];
ld.param.s32 %rd2, [_ZN8__main__7add_241B82cw51cXTLSUwv1kAPW1tQPAP9CY9GJAHUqIFJIBltW60OjnB1KwVoKKiDnAeKfyQhmB_2bRhID6gTzjWk0AEii_param_1];
ld.param.s32 %rd3, [_ZN8__main__7add_241B82cw51cXTLSUwv1kAPW1tQPAP9CY9GJAHUqIFJIBltW60OjnB1KwVoKKiDnAeKfyQhmB_2bRhID6gTzjWk0AEii_param_2];
add.s64 %rd4, %rd3, %rd2;
st.u64 [%rd1], %rd4;
mov.u32 %r1, 0;
st.param.b32 [func_retval0+0], %r1;
ret;
}
My question is, why is the extra parameter added as a first argument and what is the purpose of this?
Best Regards,
Chris