Hi everyone. Probably I found a bug in Numba. I created a reproducable demo. It should allocate only 256KB of memory but allocates more than 2GB. Can someone take a look?
import numba as nb
from numba import cuda
from time import sleep
######################
@cuda.jit("void(int32[:])", device=True)
def GPU_device_function(arr):
return
@cuda.jit("void()")
def GPU_entry_point():
# When this if is removed, it works normally
if cuda.grid(1):
return
# Should use only 256 KB of memory.
arr = cuda.local.array(shape=65536, dtype=nb.int32)
# When this assigment is removed, it works normally
arr[0] = 0
# When this call is removed, it works normally
GPU_device_function(arr)
######################
if __name__ == '__main__':
print(cuda.select_device(0))
print("LOADED")
GPU_entry_point[1, 1]() # Run once
cuda.synchronize()
print("DONE")
sleep(3) # Wait, so the memory spike will show up in Task manager before deallocation
print("END")