@brandonwillard, I’m thinking about the way loops are represented in Numba in general and how the ufunc
loop nests (as referenced) are quite involved. The control flow graphs for the LLVM IR can be seen with:
import numpy as np
import numba
@numba.vectorize(["float64(float64, float64)"], identity="reorderable")
def custom_op_fn(x, y):
if x > y:
return x
else:
return y
@numba.njit(debug=True)
def max_reduce_axis_1(x):
res = np.full((x.shape[0],), -np.inf, dtype=x.dtype)
x_transpose = np.transpose(x)
for m in range(x.shape[1]):
custom_op_fn(res, x_transpose[m], res)
return res
X = np.random.normal(size=(5000, 5000))
res_1 = max_reduce_axis_1(X)
@numba.njit(debug=True, error_model='numpy')
def max_reduce_axis_2(x):
res = np.empty((x.shape[0],), dtype=x.dtype)
for i in range(x.shape[0]):
res[i] = -np.inf
for j in range(x.shape[1]):
tmp = x[i, j]
if res[i] < tmp:
res[i] = tmp
return res
res_2 = max_reduce_axis_2(X)
assert np.array_equal(res_1, res_2)
assert np.array_equal(res_1, np.max(X, axis=1))
def show_llvm_cfg(func):
func.inspect_cfg(func.signatures[0]).pretty_printer(interleave=True, view=True)
show_llvm_cfg(max_reduce_axis_1)
show_llvm_cfg(max_reduce_axis_2)