Potential bug using np.inv with parallel accelerator

I asked an SO question last week about obtaining different results when using simple jit and jit with parallel=True, which was eventually resolved using a different approach (using np.linalg.solve instead of inverting the matrix to solve a matrix equation). Although the initial question used a sub-optimal/non-recommended approach, I would still expect it to work regardless of the parallel flag. What do you think?

Happy to open an issue on github if you feel it deserves a bug report.

Here’s the reprex posted with the original question (adding it here since I’m not allowed to paste link to the SO question):

import numba
import numpy as np
from sklearn.datasets import make_regression


@numba.jit(nopython=True)
def nanlstsq(X, y):
    """Return the least-squares solution to a linear matrix equation

    Analog to ``numpy.linalg.lstsq`` for dependant variable containing ``Nan``

    Args:
        X ((M, N) np.ndarray): Matrix of independant variables
        y ({(M,), (M, K)} np.ndarray): Matrix of dependant variables

    Returns:
        np.ndarray: Least-squares solution, ignoring ``Nan``
    """
    beta = np.zeros((X.shape[1], y.shape[1]), dtype=np.float64)
    for idx in range(y.shape[1]):
        # subset y and X
        isna = np.isnan(y[:,idx])
        X_sub = X[~isna]
        y_sub = y[~isna,idx]
        # Compute beta on data subset
        XTX = np.linalg.inv(np.dot(X_sub.T, X_sub))
        XTY = np.dot(X_sub.T, y_sub)
        beta[:,idx] = np.dot(XTX, XTY)
    return beta


@numba.jit(nopython=True, parallel=True)
def nanlstsq_parallel(X, y):
    beta = np.zeros((X.shape[1], y.shape[1]), dtype=np.float64)
    for idx in numba.prange(y.shape[1]):
        # subset y and X
        isna = np.isnan(y[:,idx])
        X_sub = X[~isna]
        y_sub = y[~isna,idx]
        # Compute beta on data subset
        XTX = np.linalg.inv(np.dot(X_sub.T, X_sub))
        XTY = np.dot(X_sub.T, y_sub)
        beta[:,idx] = np.dot(XTX, XTY)
    return beta




# Generate random data
n_targets = 10000
n_features = 3
X, y = make_regression(n_samples=200, n_features=n_features,
                       n_targets=n_targets)
# Add random nan to y array
y.ravel()[np.random.choice(y.size, 5*n_targets, replace=False)] = np.nan
# Run the regression
beta = nanlstsq(X, y)
beta_parallel = nanlstsq_parallel(X, y)

np.testing.assert_allclose(beta, beta_parallel)

Link here

Your example seems to work in mainline.