Migrating my question from github
I’m trying to find the complex powers using numba cuda, and it seems that it has been supported, but for some reason, my code doesn’t run. I’ve implemented my algorithm in CUDA and CPU, Ideally the plots should be the same. I’ve tried this with real numbers and things do work. Open to any suggestions
import numpy as np
import matplotlib.pyplot as plt
import time, math
from matplotlib.colors import hsv_to_rgb
from numba import cuda, jit, prange
@cuda.jit
def get_grid_cuda(Z):
i, j = cuda.grid(2)
if i >=0 and i < Z.shape[0] and j >=0 and j < Z.shape[1]:
for i in range(200):
fx=Z[i,j]**3+Z[i,j]**2+Z[i,j]+1
dfx=3*Z[i,j]**2+2*Z[i,j]+1
div=fx/dfx
Z[i,j]=Z[i,j]-div
if(abs(div)<1e-3):
break
@jit(nopython=True)
def NewtonR(x):
for i in range(100):
fx=x**3+x**2+x+1
dfx=3*x**2+2*x+1
div=fx/dfx
x=x-div
if(abs(div)<1e-3):
break
return x
@jit(nopython=True, parallel=True)
def get_grid(Z):
for i in prange(len(Z)):
for j in prange(len(Z[0])):
Z[i,j]=NewtonR(Z[i,j])
return Z
x=np.linspace(-1,1,512,dtype=complex)
xj=np.linspace(-1j,1j,512,dtype=complex)
xx,yy=np.meshgrid(x,xj)
X=xx+yy
Z=np.copy(X)
Z_cpu=np.copy(X)
threadsperblock = (16, 16)
blockspergrid_x = math.ceil(Z.shape[0] / threadsperblock[0])
blockspergrid_y = math.ceil(Z.shape[1] / threadsperblock[1])
blockspergrid = (blockspergrid_x, blockspergrid_y)
get_grid_cuda[blockspergrid, threadsperblock](Z)
Z_cpu=get_grid(Z_cpu)
plt.contourf(X.real,X.imag,Z,levels=30)
plt.show()
plt.close()
plt.contourf(X.real,X.imag,Z_cpu,levels=30)
plt.show()
plt.close()