Hi,
I am trying to parallelize the next code:
import numpy as np
import numba as nb
import math
# Params for array construction
shape = (4_000, 4_000)
dtype = np.float32
@nb.jit
def circle_filter(val: float, row: int, col: int, nrows: int, ncols: int) -> float:
x = (2. * row / nrows) - 1.
y = (2. * col / ncols) - 1.
if ((x ** 2 + y ** 2) <= 1) and val >= 0.5:
return 1.
return math.nan
@nb.jit
#@nb.jit(parallel=True)
def circle_fun(out, vals, nrows: int, ncols: int) -> int:
n = out.shape[0]
m = out.shape[1]
for i in range(n):
for j in range(m):
out[i, j] = circle_filter(vals[i, j], i, j, nrows, ncols)
return 0
#@nb.jit # this works!
@nb.jit(parallel=True)
def circle_fun2(out, vals, nrows: int, ncols: int) -> int:
n, m = out.shape
for i in range(n):
for j in range(m):
x = (2. * i / nrows) - 1.
y = (2. * j / ncols) - 1.
if ((x ** 2 + y ** 2) <= 1) and vals[i, j] >= 0.5:
out[i, j] = 1.
else:
out[i, j] = math.nan
return 0
def numpy_rand():
rng = np.random.default_rng()
return rng.random(shape, dtype=dtype)
rand_data = numpy_rand()
circle = np.empty(shape, dtype)
def numba_computations():
circle_fun2(circle, rand_data, *shape)
circle_fun2.parallel_diagnostics(level=4)
numba_computations()
def numpy_reductions():
area_circle = np.nansum(circle)
area_square = np.nansum(rand_data)
print(f"PI value: {4 * area_circle / area_square}")
numpy_reductions()
But when executing, I am getting this warning:
NumbaPerformanceWarning:
The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.
After activating the parallel_diagnostics(), I am getting this output:
/Users/faltet/miniconda3/envs/iron-array-python/bin/python /Users/faltet/iarray/iron-array-python/iron-array-notebooks/blogs/numba-unable-paralellize.py
/Users/faltet/miniconda3/envs/iron-array-python/lib/python3.9/site-packages/numba/core/typed_passes.py:329: NumbaPerformanceWarning:
The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.
To find out why, try turning on parallel diagnostics, see https://numba.readthedocs.io/en/stable/user/parallel.html#diagnostics for help.
File "numba-unable-paralellize.py", line 30:
@nb.jit(parallel=True)
def circle_fun2(out, vals, nrows: int, ncols: int) -> int:
^
warnings.warn(errors.NumbaPerformanceWarning(msg,
================================================================================
Parallel Accelerator Optimizing: Function circle_fun2,
/Users/faltet/iarray/iron-array-python/iron-array-notebooks/blogs/numba-unable-
paralellize.py (29)
================================================================================
No source available
--------------------------------- Fusing loops ---------------------------------
Attempting fusion of parallel loops (combines loops with similar properties)...
----------------------------- Before Optimisation ------------------------------
--------------------------------------------------------------------------------
------------------------------ After Optimisation ------------------------------
Parallel structure is already optimal.
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
---------------------------Loop invariant code motion---------------------------
Allocation hoisting:
No allocation hoisting found
Instruction hoisting:
No instruction hoisting found
--------------------------------------------------------------------------------
PI value: 3.1421794012211026
Process finished with exit code 0
which sheds not a great light to me.
Actually, I was trying to parallelize circle_fun(), which calls circle_filter(), but I was having the same problem, so this is why I went with fusion both into circle_fun2(), but the issue persists.
Any advice will be welcome!