OK almost there but hitting a problem.
It appears that numba.cuda.CUDADispatcher.call() typechecking kicks off at this line kernel = _dispatcher.Dispatcher._cuda_call(self, *args) rejects anything that is not a recognized Numba type before custom extension handler has a chance to modify the type which only starts later at this line kernel.launch(args, griddim, blockdim, stream, sharedmem).
Does this mean that in kernel[1,1](var)
var has to be of type recognizable by Numba to start with?
I’m probably missing something…
“Extension-less” solution in the post above does the job for now
Thanks Graham @gmarkall!
import numpy as np
import ctypes
import numpy as np
from PIL import Image
from cuda import cudart
from cuda.cudart import cudaGraphicsRegisterFlags as GLFlags
import numba.cuda as nbcuda
import moderngl as mgl
class ModernGL_VBO_Handler:
def __init__(self, dtype) -> None:
self.dtype = np.dtype(dtype)
def _ptr_to_DeviceNDArray(ptr, shape, strides=None, order="C"):
dtype = self.dtype
shape, strides, dtype = nbcuda.api.prepare_shape_strides_dtype(
shape, strides, dtype, order
)
datasize = nbcuda.driver.memory_size_from_info(shape, strides, dtype.itemsize)
ctx = nbcuda.current_context()
c_ptr = ctypes.c_uint64(ptr)
mem = nbcuda.driver.MemoryPointer(ctx, c_ptr, datasize)
return nbcuda.cudadrv.devicearray.DeviceNDArray(
shape, strides, dtype, gpu_data=mem
)
def _vbo_to_DeviceNDArray(vbo):
err, gl_resource = cudart.cudaGraphicsGLRegisterBuffer(
vbo.glo, GLFlags.cudaGraphicsRegisterFlagsNone
)
(err,) = cudart.cudaGraphicsMapResources(1, gl_resource, 0)
(err, dev_ptr, dev_buff_size) = cudart.cudaGraphicsResourceGetMappedPointer(
gl_resource
)
return gl_NDArray(dev_ptr, dev_buff_size)
def prepare_args(self, ty, val, **kwargs):
print(__name__, ty) # THIS NEVER RUNS
if isinstance(val, mgl.Buffer):
val = _vbo_to_DeviceNDArray(val)
ty = type(val)
return ty, val
else:
return ty, val
@nbcuda.jit(extensions=[ModernGL_VBO_Handler(np.float32)])
def shift_02(x):
idx = nbcuda.grid(1)
x[idx] = x[idx] + 0.2
#####################################
# save fbo image into file
def save_image(fbo, name):
data = fbo.read(components=3)
image = Image.frombytes("RGB", fbo.size, data)
image.save(name)
##################################################################
# these shaders do nothing just passing the data along the pipeline
vert_code = """
#version 330
in vec2 in_vert;
in vec3 in_color;
out vec3 color;
void main() {
gl_Position = vec4(in_vert, 0.0, 1.0);
color = in_color;
}
"""
frag_code = """
#version 330
in vec3 color;
out vec4 fragColor;
void main(){
fragColor = vec4(color, 1.0);
}
"""
ctx = mgl.create_context(standalone=True)
prog = ctx.program(vertex_shader=vert_code, fragment_shader=frag_code)
# vertices for triangle with Red Green and Blue colors
vertices = np.array(
# X Y x y x y
[-0.6, -0.6, 1.0, 0.0, 0.0, 0.6, -0.6, 0.0, 1.0, 0.0, 0.0, 0.6, 0.0, 0.0, 1.0],
dtype="f4",
)
# init VBO, VAO and FBO
vbo = ctx.buffer(vertices)
vao = ctx.simple_vertex_array(prog, vbo, "in_vert", "in_color")
fbo = ctx.framebuffer(color_attachments=[ctx.texture((512, 512), 4)])
fbo.use()
# rendering
# 1st render as is
ctx.clear()
vao.render(mgl.TRIANGLES)
save_image(fbo, "image1.jpg")
# 2nd render using numba kernel
ctx.clear()
some_arr = np.array([1,2,3])
######################################################
# DEBUG: replacing the vbo with `some_arr` gets to the extension handler... (unnecessarily)
shift_02[1,2](vbo) # shift coordinates (x,y) of first point
######################################################
nbcuda.synchronize()
vao.render(mgl.TRIANGLES)
save_image(fbo, "image2.jpg")