Here is my function accelerated by numba. The function has a parameter of 3D array. I wonder if there is any way to make it faster.
a = np.ones((256,256), dtype='float32')
b = np.ones((256,256), dtype='float32') * 3
c = np.random.rand(4, 256, 256) + 1j*np.random.rand(4, 256, 256)
c = c.astype('complex64')
sig = ['void(float32[:,:],float32[:,:],complex64[:,:,:])',
'void(float32[:,:],float32[:,:],complex128[:,:,:])',
'void(float64[:,:],float64[:,:],complex64[:,:,:])',
'void(float64[:,:],float64[:,:],complex128[:,:,:])']
@njit(sig, parallel=True, boundscheck=False)
def f(amplitude, denom, wave):
eps = 2.2204e-16
for i in nb.prange(wave.shape[0]):
for j in range(wave.shape[1]):
for k in range(wave.shape[2]):
wave[i, j, k] = wave[i, j, k] * amplitude[j, k] / (np.sqrt(denom[j, k]) + eps)
@njit(parallel=True, boundscheck=False)
def f1(amplitude, denom, wave):
eps = 2.2204e-16
for i in nb.prange(wave.shape[0]):
for j in range(wave.shape[1]):
for k in range(wave.shape[2]):
wave[i, j, k] = wave[i, j, k] * amplitude[j, k] / (np.sqrt(denom[j, k]) + eps)
%%time
f(a,b,c)
Wall time: 5.95 ms
%%time
f1(a,b,c)
Wall time: 5.95 ms