Hey there,
as Numba sadly doesn’t support np.nanmin
or np.nanmax
for parallelization I (and someone else) tried to do it ourselves:
Code
import numpy as np
import numba as nb
from math import isnan, inf
@nb.njit(fastmath=True)
def _minmax_nan(x):
maximum = -inf
minimum = inf
for i in x:
if not isnan(i):
if i > maximum:
maximum = i
if i < minimum:
minimum = i
return minimum, maximum
@nb.njit(parallel=True)
def _minmax_chunks_nan(x, chunk_ranges):
overall_maxima = []
overall_minima = []
for i in nb.prange(chunk_ranges.shape[0]):
start = chunk_ranges[i, 0]
end = chunk_ranges[i, 1]
chunk_minimum, chunk_maximum = _minmax_nan(x[start : end])
overall_maxima.append(chunk_maximum)
overall_minima.append(chunk_minimum)
return min(overall_minima), max(overall_maxima)
def even_chunk_sizes(dividend, divisor):
quotient, remainder = divmod(dividend, divisor)
cells = [quotient for _ in range(divisor)]
for i in range(remainder):
cells[i] += 1
return cells
def even_chunk_ranges(dividend, divisor):
sizes = even_chunk_sizes(dividend, divisor)
ranges = []
start = 0
for s in sizes:
end = start + s
ranges.append((start, end))
start = end
return ranges
def nanminmax_parallel(x, n_chunks):
chunk_ranges = np.array([
[start, end]
for start, end
in even_chunk_ranges(len(x), n_chunks)
], dtype=np.int64)
return _minmax_chunks_nan(x, chunk_ranges)
Doing things like this in the jupyter notebook is a sure way to kill the kernel:
arr = np.random.rand(10)
%timeit nanminmax_parallel(arr, 4)
Just calling the function in quick succession seems to cause a crash.
Can someone help with this issue? Why does it crash, seemingly by chance?
Also, I’m pretty new to numba+JIT so any suggestions to improve this piece of code would be much appreciated.
Best regards
PS: relevant github with ipynb+binder: GitHub - rynkk/misc-ipynbs