I didn’t observe any significant difference between the two (hopefully I faithfully reproduced your code). I did a run first to factor out compile time.
numba.version=‘0.56.4’, sys.version_info=sys.version_info(major=3, minor=9, micro=16, releaselevel=‘final’, serial=0)
mandelbrot(): 16.617ms 16.758ms
mandelbrot_inl(): 16.628ms 17.048ms
It may or may not be related to this item, I got slightly more intuitive results with numba 0.53.1. Tracker here
numba.version=‘0.53.1’, sys.version_info=sys.version_info(major=3, minor=9, micro=16, releaselevel=‘final’, serial=0)
mandelbrot(): 16.467ms 16.821ms
mandelbrot_inl(): 16.396ms 16.560ms
import sys
import numba
import numpy as np, numba as nb, timeit as ti
xmin, xmax, xn = -2.25, 0.75, 450
ymin, ymax, yn = -1.25, 1.25, 375
imax = 200
@nb.njit(fastmath=True, locals=dict(x=nb.complex64))
def abs2(x):
return x.real ** 2 + x.imag ** 2
@nb.njit(fastmath=True, locals=dict(c=nb.complex64))
def kernel(c):
z = c
for i in range(imax):
z = z * z + c
if abs2(z) > 4:
# if (z.real**2 + z.imag**2) > 4:
return i
return imax
@nb.njit(fastmath=True, locals=dict(c=nb.complex64))
def kernel_inl(c):
z = c
for i in range(imax):
z = z * z + c
if (z.real**2 + z.imag**2) > 4:
return i
return imax
@nb.njit(fastmath=True)
def mandelbrot():
result = np.zeros((yn, xn), dtype=np.uint32)
for j, y in zip(range(yn), np.arange(ymin, ymax, (ymax - ymin) / yn)):
for i, x in zip(range(xn), np.arange(xmin, xmax, (xmax - xmin) / xn)):
result[j, i] = kernel(np.csingle(x + y * 1j))
return result
@nb.njit(fastmath=True)
def mandelbrot_inl():
result = np.zeros((yn, xn), dtype=np.uint32)
for j, y in zip(range(yn), np.arange(ymin, ymax, (ymax - ymin) / yn)):
for i, x in zip(range(xn), np.arange(xmin, xmax, (xmax - xmin) / xn)):
result[j, i] = kernel_inl(np.csingle(x + y * 1j))
return result
if __name__ == "__main__":
funs = [f'mandelbrot()', f'mandelbrot_inl()']
for fun in funs:
t = 1000 * np.array(ti.repeat(stmt=fun, setup=fun, globals=globals(), number=1, repeat=100))
print(f'{numba.__version__=}, {sys.version_info=}')
for fun in funs:
t = 1000 * np.array(ti.repeat(stmt=fun, setup=fun, globals=globals(), number=1, repeat=100))
print(f'{fun}: {np.amin(t):6.3f}ms {np.median(t):6.3f}ms')