Numba Cuda error

I have a simulation that has been running fine for years. I recently ungraded by system from ubuntu 16 to Ubuntu 20. I also moved from python 2.7 to python 3.8.8
My sims can take an hour or so to run and I started getting these crashes, they happen intermittantly. I ran the sim 3 times today with out crashing and it crashed just after halfway through this time.
Any ideas.

Traceback (most recent call last):
File “SelectableInvasionBufferArmsRace.py3”, line 625, in
generateNextPopblockspergrid, threadsperblock
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/compiler.py”, line 821, in call
return self.dispatcher.call(args, self.griddim, self.blockdim,
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/compiler.py”, line 966, in call
kernel.launch(args, griddim, blockdim, stream, sharedmem)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/compiler.py”, line 702, in launch
driver.device_to_host(ctypes.addressof(excval), excmem, excsz)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 2403, in device_to_host
fn(host_pointer(dst), device_pointer(src), size, *varargs)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 300, in safe_cuda_api_call
self._check_error(fname, retcode)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 335, in _check_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [700] Call to cuMemcpyDtoH results in UNKNOWN_CUDA_ERROR
Traceback (most recent call last):
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 642, in _exitfunc
f()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1804, in deref
mem.free()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1617, in free
self._finalizer()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1285, in core
deallocations.add_item(driver.cuMemFree, handle, size)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 970, in add_item
self.clear()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 981, in clear
dtor(handle)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 300, in safe_cuda_api_call
self._check_error(fname, retcode)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 335, in _check_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [700] Call to cuMemFree results in UNKNOWN_CUDA_ERROR
Traceback (most recent call last):
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 642, in _exitfunc
f()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1804, in deref
mem.free()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1617, in free
self._finalizer()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1285, in core
deallocations.add_item(driver.cuMemFree, handle, size)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 970, in add_item
self.clear()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 981, in clear
dtor(handle)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 300, in safe_cuda_api_call
self._check_error(fname, retcode)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 335, in _check_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [700] Call to cuMemFree results in UNKNOWN_CUDA_ERROR
Traceback (most recent call last):
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 642, in _exitfunc
f()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1804, in deref
mem.free()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1617, in free
self._finalizer()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1285, in core
deallocations.add_item(driver.cuMemFree, handle, size)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 970, in add_item
self.clear()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 981, in clear
dtor(handle)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 300, in safe_cuda_api_call
self._check_error(fname, retcode)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 335, in _check_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [700] Call to cuMemFree results in UNKNOWN_CUDA_ERROR
Traceback (most recent call last):
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 642, in _exitfunc
f()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1804, in deref
mem.free()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1617, in free
self._finalizer()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1285, in core
deallocations.add_item(driver.cuMemFree, handle, size)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 970, in add_item
self.clear()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 981, in clear
dtor(handle)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 300, in safe_cuda_api_call
self._check_error(fname, retcode)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 335, in _check_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [700] Call to cuMemFree results in UNKNOWN_CUDA_ERROR
Traceback (most recent call last):
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 642, in _exitfunc
f()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1804, in deref
mem.free()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1617, in free
self._finalizer()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1285, in core
deallocations.add_item(driver.cuMemFree, handle, size)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 970, in add_item
self.clear()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 981, in clear
dtor(handle)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 300, in safe_cuda_api_call
self._check_error(fname, retcode)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 335, in _check_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [700] Call to cuMemFree results in UNKNOWN_CUDA_ERROR
Traceback (most recent call last):
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 642, in _exitfunc
f()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1804, in deref
mem.free()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1617, in free
self._finalizer()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1285, in core
deallocations.add_item(driver.cuMemFree, handle, size)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 970, in add_item
self.clear()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 981, in clear
dtor(handle)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 300, in safe_cuda_api_call
self._check_error(fname, retcode)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 335, in _check_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [700] Call to cuMemFree results in UNKNOWN_CUDA_ERROR
Traceback (most recent call last):
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 642, in _exitfunc
f()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1804, in deref
mem.free()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1617, in free
self._finalizer()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1285, in core
deallocations.add_item(driver.cuMemFree, handle, size)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 970, in add_item
self.clear()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 981, in clear
dtor(handle)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 300, in safe_cuda_api_call
self._check_error(fname, retcode)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 335, in _check_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [700] Call to cuMemFree results in UNKNOWN_CUDA_ERROR
Traceback (most recent call last):
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 642, in _exitfunc
f()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1804, in deref
mem.free()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1617, in free
self._finalizer()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1285, in core
deallocations.add_item(driver.cuMemFree, handle, size)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 970, in add_item
self.clear()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 981, in clear
dtor(handle)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 300, in safe_cuda_api_call
self._check_error(fname, retcode)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 335, in _check_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [700] Call to cuMemFree results in UNKNOWN_CUDA_ERROR
Traceback (most recent call last):
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 642, in _exitfunc
f()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1804, in deref
mem.free()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1617, in free
self._finalizer()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1285, in core
deallocations.add_item(driver.cuMemFree, handle, size)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 970, in add_item
self.clear()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 981, in clear
dtor(handle)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 300, in safe_cuda_api_call
self._check_error(fname, retcode)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 335, in _check_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [700] Call to cuMemFree results in UNKNOWN_CUDA_ERROR
Traceback (most recent call last):
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 642, in _exitfunc
f()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1804, in deref
mem.free()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1617, in free
self._finalizer()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1285, in core
deallocations.add_item(driver.cuMemFree, handle, size)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 970, in add_item
self.clear()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 981, in clear
dtor(handle)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 300, in safe_cuda_api_call
self._check_error(fname, retcode)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 335, in _check_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [700] Call to cuMemFree results in UNKNOWN_CUDA_ERROR
Traceback (most recent call last):
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 642, in _exitfunc
f()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1804, in deref
mem.free()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1617, in free
self._finalizer()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1285, in core
deallocations.add_item(driver.cuMemFree, handle, size)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 970, in add_item
self.clear()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 981, in clear
dtor(handle)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 300, in safe_cuda_api_call
self._check_error(fname, retcode)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 335, in _check_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [700] Call to cuMemFree results in UNKNOWN_CUDA_ERROR
Traceback (most recent call last):
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 642, in _exitfunc
f()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1804, in deref
mem.free()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1617, in free
self._finalizer()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1285, in core
deallocations.add_item(driver.cuMemFree, handle, size)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 970, in add_item
self.clear()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 981, in clear
dtor(handle)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 300, in safe_cuda_api_call
self._check_error(fname, retcode)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 335, in _check_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [700] Call to cuMemFree results in UNKNOWN_CUDA_ERROR
Traceback (most recent call last):
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 642, in _exitfunc
f()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1804, in deref
mem.free()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1617, in free
self._finalizer()
File “/home/phill/anaconda3/lib/python3.8/weakref.py”, line 566, in call
return info.func(*info.args, **(info.kwargs or {}))
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 1285, in core
deallocations.add_item(driver.cuMemFree, handle, size)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 970, in add_item
self.clear()
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 981, in clear
dtor(handle)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 300, in safe_cuda_api_call
self._check_error(fname, retcode)
File “/home/phill/anaconda3/lib/python3.8/site-packages/numba/cuda/cudadrv/driver.py”, line 335, in _check_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [700] Call to cuMemFree results in UNKNOWN_CUDA_ERROR

Do you have a testsuite for your simulation code? If so, does running it under compute-sanitizer show any errors?

You can run with:

compute-sanitizer python <args>

(You could run the whole simulation under it too, but I don’t know if that would be too time-consuming)

Thanks for the response. The error is intermittent, Sometimes it will run successfully. each run takes about 20 mins, it can run a long time before crashing.
I’m trying to capture the conditions that lead to the error using exception handling, so far with out success.
The program take in a lot of random numbers so I can’t reproduce the error yet.

it looks like the error might be reproducible. I did a
try:
do stuff()
on except:
do stuff()
and it got the same error again. So I might be able to write to file the data that lead to the error and see whats up with that.