Working example of CUDA OpenGL interop. This is a follow up to my earlier question on the same topic.
OpenGL bits are provided by awesome moderngl + moderngl-window library.
Giant thanks to Graham @gmakrall!
import ctypes
import numpy as np
import numba.cuda as cuda
import cuda.cudart as curt
from cuda.cudart import cudaGraphicsRegisterFlags as GLFlags
import moderngl as mgl
import moderngl_window as mglw
from moderngl_window.conf import settings
###################################################
# wrap the raw pointer into DeviceNDArray
def GL_NDArray(ptr, buff_size, dtype=np.float32, strides=None, order="C"):
shape = buff_size // np.dtype(dtype).itemsize
shape, strides, dtype = cuda.api.prepare_shape_strides_dtype(
shape, strides, dtype, order
)
datasize = cuda.driver.memory_size_from_info(shape, strides, dtype.itemsize)
ctx = cuda.current_context()
c_ptr = ctypes.c_uint64(ptr)
mem = cuda.driver.MemoryPointer(ctx, c_ptr, datasize)
return cuda.cudadrv.devicearray.DeviceNDArray(shape, strides, dtype, gpu_data=mem)
##################################################################
# these shaders do nothing just passing the data along the pipeline
vert_code = """
#version 330
in vec2 in_vert;
in vec3 in_color;
out vec3 color;
void main() {
gl_Position = vec4(in_vert, 0.0, 1.0);
color = in_color;
}
"""
frag_code = """
#version 330
in vec3 color;
out vec4 fragColor;
void main(){
fragColor = vec4(color, 1.0);
}
"""
########################################################################################
# window config
settings.WINDOW["class"] = "moderngl_window.context.glfw.Window"
settings.WINDOW["gl_version"] = (3, 3)
########################################################################################
# Init Vertices numpy array with random y data
N = 300
x = np.linspace(-1.0, 1.0, N)
y = (np.random.rand(N) - 0.5) * 2
r = np.ones(N)
g = np.zeros(N)
b = np.zeros(N)
vertices = np.dstack([x, y, r, g, b])
vert_data = vertices.astype("f4").tobytes()
########################################################################################
# Silly CUDA kernel that scales Y coordinate by a factor
@cuda.jit
def shift(x):
idx = cuda.grid(1)
if idx > x.size:
return
ty = idx % 5
if ty == 1: # _ Y _ _ _
x[idx] = x[idx] * 0.997
########################################################################################
########################################################################################
# create window. everything else is on window context
window = mglw.create_window_from_settings()
ctx = window.ctx
# Program
prog = ctx.program(vertex_shader=vert_code, fragment_shader=frag_code)
# Init → VBO VAO FBO
vbo = ctx.buffer(vert_data) # f4 float32
vao = ctx.simple_vertex_array(prog, vbo, "in_vert", "in_color")
########################################################################################
# Get CUDA pointer to VBO
err, gl_resource = curt.cudaGraphicsGLRegisterBuffer(
vbo.glo, GLFlags.cudaGraphicsRegisterFlagsNone
)
(err,) = curt.cudaGraphicsMapResources(1, gl_resource, 0)
(err, dev_ptr, dev_buff_size) = curt.cudaGraphicsResourceGetMappedPointer(gl_resource)
# wrap into DeviceNDArray
vbo_arr = GL_NDArray(dev_ptr, dev_buff_size)
########################################################################################
# render loop
while not window.is_closing:
window.clear()
# kernel → sync → vao render
shift[vbo_arr.size // 1024 + 1, 1024](vbo_arr)
cuda.synchronize()
vao.render(mgl.LINE_STRIP)
window.swap_buffers()
# CUDA cleanup
(err,) = curt.cudaGraphicsUnmapResources(1, gl_resource, 0)
(err,) = curt.cudaGraphicsUnregisterResource(gl_resource)
window.destroy()
########################################################################################