For completeness, I attempted to perform the same computation using the mpmath package which claims to provide fixed precision capabilities:
import mpmath
mpmath.mp.dps = 100 # Precision
def mpmath_cpu_func(a, b, c, d):
for i in range(a.rows):
for l in range(d[i], 0, -1):
for j in range(l):
a[i, j] = (b[i] * a[i, j] + (1.0 - b[i]) * a[i, j + 1]) / c[i]
return a[:, 0]
# Convert inputs to mpmath types with high precision
mp_a = mpmath.matrix(a.copy())
for i in range(a.shape[0]):
for j in range(a.shape[1]):
mp_a[i, j] = mpmath.mpf(a[i, j].astype(str))
mp_b = [mpmath.mpf(b[0].astype(str))]
mp_c = [mpmath.mpf(c[0].astype(str))]
mpmath_cpu_func(mp_a, mp_b, mp_c, d) # Produces 0.6449015342958763156413719663014237700252472529553791866336058829452386808983049961922292904843391669
Note that the high precision result is also different from the CPU and GPU results above.