# Faster for loop - help pls

Am looking to speed up the following function:

``````def get_groups(values: list, threshold: int) -> np.array:
x: int = 0
count: int = 1
group = []
for i in values:
x += i
group.append(count)
if x >= threshold:
count +=1
x = 0
return np.array(group)

values  = list(np.random.randint(0,100,10000))
get_groups(values, 1000)
``````

using `@njit()` decorator gives me a deprecation warning and the function is even slower than in pure python. Any pointer how I can speed this up would be really appreciated.

hi @steff, if you change `values` to be an array instead of a list `values = np.random.randint(0,100,10000)`, you can get 100x improvement.
As a general rule, itâ€™s always faster to use arrays instead of lists.

Hope it helps!

1 Like

Hi @steff,

As @luk-f-a suggests, passing arrays instead of lists really helps. When data is passed into Numba functions it has to be â€śunboxedâ€ť into a native form, the cost of which depends on the data type but for lists itâ€™s quite expensive and for arrays itâ€™s cheap. It also looks like `group` is appended to for every iteration of the loop in `values`, therefore itâ€™s size is known ahead of running the loop, this information can be exploited and the use of a list entirely avoided. Hereâ€™s a few variants to get you started:

``````from numba import njit
import numpy as np
import time

iterations = 50

# --- pure Numpy/Python
def get_groups(values, threshold):
x = 0
count = 1
group = []
for i in values:
x += i
group.append(count)
if x >= threshold:
count +=1
x = 0
return np.array(group)

np.random.seed(0)
values  = list(np.random.randint(0,100,10000))
r0 = get_groups(values, 1000)

ts = time.time()
for x in range(iterations):
get_groups(values, 1000)
e0 = e = time.time() - ts
print("Elapsed:", e, "speed up:", e0 / e)

# --- original

@njit
def get_groups0(values, threshold):
x = 0
count = 1
group = []
for i in values:
x += i
group.append(count)
if x >= threshold:
count +=1
x = 0
return np.array(group)

np.random.seed(0)
values  = list(np.random.randint(0,100,10000))
r = get_groups0(values, 1000)
np.testing.assert_allclose(r, r0) # check result

ts = time.time()
for x in range(iterations):
get_groups0(values, 1000)
e = time.time() - ts
print("Elapsed:", e, "speed up:", e0 / e)

# --- array input instead of list

@njit
def get_groups1(values, threshold):
x = 0
count = 1
group = []
for i in values:
x += i
group.append(count)
if x >= threshold:
count +=1
x = 0
return np.array(group)

np.random.seed(0)
values  = np.random.randint(0,100,10000)
r = get_groups1(values, 1000)
np.testing.assert_allclose(r, r0) # check result

ts = time.time()
for x in range(iterations):
get_groups1(values, 1000)
e = time.time() - ts
print("Elapsed:", e, "speed up:", e0 / e)

# --- array buffer instead of list

@njit
def get_groups2(values, threshold):
x = 0
count = 1
group = np.empty_like(values)
for ix, i in enumerate(values):
x += i
group[ix] = count
if x >= threshold:
count +=1
x = 0
return group

np.random.seed(0)
values  = np.random.randint(0,100,10000)
r = get_groups2(values, 1000)
np.testing.assert_allclose(r, r0) # check result

ts = time.time()
for x in range(iterations):
get_groups2(values, 1000)
e = time.time() - ts
print("Elapsed:", e, "speed up:", e0 / e)

# --- simplify loop

@njit
def get_groups3(values, threshold):
x = 0
count = 1
group = np.empty_like(values)
ix = 0
while(ix < len(values)):
x += values[ix]
group[ix] = count
if x >= threshold:
count +=1
x = 0
ix += 1
return group

np.random.seed(0)
values  = np.random.randint(0,100,10000)
r = get_groups3(values, 1000)
np.testing.assert_allclose(r, r0) # check result

ts = time.time()
for x in range(iterations):
get_groups3(values, 1000)
e = time.time() - ts
print("Elapsed:", e, "speed up:", e0 / e)
``````

gives me:

``````Elapsed: 0.2797975540161133 speed up: 1.0
Elapsed: 1.9415757656097412 speed up: 0.14410849114005314
Elapsed: 0.02059769630432129 speed up: 13.583924623522739
Elapsed: 0.001268148422241211 speed up: 220.63470577176162
Elapsed: 0.0008413791656494141 speed up: 332.54633040521395
``````

The last version being a load faster than the original. Hope this helps?

3 Likes

Thanks a lot Stuart. Much appreciated. Pure Numba magic!

It did help indeed. Thank you for the valuable hint!

@steff No problem, glad it worked ok!

Hi guys. have another question if you donâ€™t mind. And its another for loop I would like to make faaaaster. This is what I have come up with but seems `np.pad` is not support by Numba (yet):

``````# @njit(cache=True)
def clean_ticks_np(values: np.array, window: int=3, granularity: float=0.02) -> np.array:
keep = np.empty_like(values)
x = k
ix = 0
while (x < len(values)):
keep[ix] = np.abs(values[x] - np.mean(values[x-k:x])) < 3 * np.std(values[x-k:x],ddof=1) + granularity
x += 1
ix += 1
return keep
``````

to create data:

``````from math import sqrt
from scipy.stats import norm
import numpy as np

def brownian(x0, n, dt, delta, out=None, normalise=True):
x0 = np.asarray(x0)
r = norm.rvs(size=x0.shape + (n,), scale=delta * sqrt(dt))
if out is None:
out = np.empty(r.shape)
np.cumsum(r, axis=-1, out=out)
out += np.expand_dims(x0, axis=-1)
if normalise:
out = (out - out.min())/(out.max()- out.min())
return out

n = 1000000
values = brownian(0.5, n, 10, 1000) * 10
``````

Is there some other way to speed this up?
Or do I need to code a pure python replacement for np.pad?

`np.concatenate((np.repeat(values[0], window), values))` instead of `np.pad` does the trick. sorry to have bothered you.