Hi @steff,
As @luk-f-a suggests, passing arrays instead of lists really helps. When data is passed into Numba functions it has to be “unboxed” into a native form, the cost of which depends on the data type but for lists it’s quite expensive and for arrays it’s cheap. It also looks like group
is appended to for every iteration of the loop in values
, therefore it’s size is known ahead of running the loop, this information can be exploited and the use of a list entirely avoided. Here’s a few variants to get you started:
from numba import njit
import numpy as np
import time
iterations = 50
# --- pure Numpy/Python
def get_groups(values, threshold):
x = 0
count = 1
group = []
for i in values:
x += i
group.append(count)
if x >= threshold:
count +=1
x = 0
return np.array(group)
np.random.seed(0)
values = list(np.random.randint(0,100,10000))
r0 = get_groups(values, 1000)
ts = time.time()
for x in range(iterations):
get_groups(values, 1000)
e0 = e = time.time() - ts
print("Elapsed:", e, "speed up:", e0 / e)
# --- original
@njit
def get_groups0(values, threshold):
x = 0
count = 1
group = []
for i in values:
x += i
group.append(count)
if x >= threshold:
count +=1
x = 0
return np.array(group)
np.random.seed(0)
values = list(np.random.randint(0,100,10000))
r = get_groups0(values, 1000)
np.testing.assert_allclose(r, r0) # check result
ts = time.time()
for x in range(iterations):
get_groups0(values, 1000)
e = time.time() - ts
print("Elapsed:", e, "speed up:", e0 / e)
# --- array input instead of list
@njit
def get_groups1(values, threshold):
x = 0
count = 1
group = []
for i in values:
x += i
group.append(count)
if x >= threshold:
count +=1
x = 0
return np.array(group)
np.random.seed(0)
values = np.random.randint(0,100,10000)
r = get_groups1(values, 1000)
np.testing.assert_allclose(r, r0) # check result
ts = time.time()
for x in range(iterations):
get_groups1(values, 1000)
e = time.time() - ts
print("Elapsed:", e, "speed up:", e0 / e)
# --- array buffer instead of list
@njit
def get_groups2(values, threshold):
x = 0
count = 1
group = np.empty_like(values)
for ix, i in enumerate(values):
x += i
group[ix] = count
if x >= threshold:
count +=1
x = 0
return group
np.random.seed(0)
values = np.random.randint(0,100,10000)
r = get_groups2(values, 1000)
np.testing.assert_allclose(r, r0) # check result
ts = time.time()
for x in range(iterations):
get_groups2(values, 1000)
e = time.time() - ts
print("Elapsed:", e, "speed up:", e0 / e)
# --- simplify loop
@njit
def get_groups3(values, threshold):
x = 0
count = 1
group = np.empty_like(values)
ix = 0
while(ix < len(values)):
x += values[ix]
group[ix] = count
if x >= threshold:
count +=1
x = 0
ix += 1
return group
np.random.seed(0)
values = np.random.randint(0,100,10000)
r = get_groups3(values, 1000)
np.testing.assert_allclose(r, r0) # check result
ts = time.time()
for x in range(iterations):
get_groups3(values, 1000)
e = time.time() - ts
print("Elapsed:", e, "speed up:", e0 / e)
gives me:
Elapsed: 0.2797975540161133 speed up: 1.0
Elapsed: 1.9415757656097412 speed up: 0.14410849114005314
Elapsed: 0.02059769630432129 speed up: 13.583924623522739
Elapsed: 0.001268148422241211 speed up: 220.63470577176162
Elapsed: 0.0008413791656494141 speed up: 332.54633040521395
The last version being a load faster than the original. Hope this helps?