When I call a cuda.jit function in a njit function, then the following error appeared.
numba.core.errors.TypingError: Failed in nopython mode pipeline (step: nopython frontend)
Untyped global name 'random_3d': Cannot determine Numba type of <class 'numba.cuda.compiler.Dispatcher'>
@cuda.jit
def random_3d(arr, rng_states):
# Per-dimension thread indices and strides
startx, starty, startz = cuda.grid(3)
stridex, stridey, stridez = cuda.gridsize(3)
# Linearized thread index
tid = (startz * stridey * stridex) + (starty * stridex) + startx
# Use strided loops over the array to assign a random value to each entry
for i in range(startz, arr.shape[0], stridez):
for j in range(starty, arr.shape[1], stridey):
for k in range(startx, arr.shape[2], stridex):
arr[i, j, k] = xoroshiro128p_uniform_float32(rng_states, tid)
@numba.njit(cache=True)
def cal(hdrs):
_, W, H, C = hdrs.shape
# Array dimensions
X, Y, Z = 256, 256, 256
# Block and grid dimensions
bx, by, bz = 8, 8, 8
gx, gy, gz = 16, 16, 16
# Total number of threads
nthreads = bx * by * bz * gx * gy * gz
# Initialize a state for each thread
rng_states = _create_xoroshiro128p_states(nthreads, seed=1)
rand_nums_gpu = cuda.device_array((X, Y, Z), dtype=np.float32)
random_3d[(gx, gy, gz), (bx, by, bz)](rand_nums_gpu, rng_states)