Hi,
I’m new to the community and two weeks ago I first taught myself Numba for my thesis, which I am programming in Python. Here I came across Numba to improve the runtime of my program. However, I still have a runtime of 2-3 minutes, so I wanted to ask if someone could help me to improve the performance of my program. I’m sure there are some tricks that could be used, or maybe I’m using something incorrectly that could affect performance.
I’m posting all my code below. Feel free to run it. Everything possible is included in the code. There are no additional parts that have not been mentioned or anything.
I intentionally generate the arrays during each run because otherwise, as before, the size of the 3D arrays, if I create everything once beforehand, will require about 16 GB of memory for 2000 simulations alone. With 5000 simulations, my memory is not even enough. That’s why I do the generation at runtime of the simulation, which makes more use of the CPU, but saves the memory.
As the saying goes: memory costs, but time does not
Code:
import matplotlib.pyplot as plt
import numpy as np
from numba import njit, prange
import time as time
t = time.time()
max_iteration = 30
N = 1000
pattern_nums = np.array([40, 60, 80, 100, 120], dtype=np.uint8)
S = 2000
T = np.array([0.01, 0.3], dtype=np.float32)
# calculating the coupling constants for all patterns
@njit(fastmath=True, nogil=True)
def add_coupling(patterns, N):
return (np.ascontiguousarray(patterns.transpose(1, 0)) @ patterns) / N
# Generating a random spin and pattern_num patterns of the size max(p)
@njit(fastmath=True, nogil=True)
def init_neurons(max_p, N):
patterns = np.random.choice(np.array([-1, 1], dtype=np.float32), size=(max_p, N))
spins = np.empty(N, dtype=np.float32)
n = np.random.randint((N / 2) + 1, N * 0.975)
L = np.random.choice(np.arange(N + 1), size=n, replace=False)
for i in range(N):
spins[i] = patterns[0, i] if i in L else (patterns[0, i] * -1)
return spins, patterns
@njit(fastmath=True, parallel=True, nogil=True)
def simulate(T, N, S, max_iteration, max_patterns):
result = np.zeros((len(T), len(max_patterns), 20), dtype=np.float64)
for r, t in enumerate(T):
B = 1 / t
for j, n in enumerate(max_patterns):
a = n / N
values = np.zeros(20, dtype=np.float32)
count = np.zeros(20, dtype=np.float32)
for i in prange(S):
spins, patterns = init_neurons(n, N)
J = add_coupling(patterns, N)
M = np.zeros((max_iteration, N), dtype=np.float32)
H = np.zeros((max_iteration, N), dtype=np.float32)
u = np.zeros(max_iteration, dtype=np.float32)
H[0] = (8 / B) * spins
M[0] = np.tanh(B * H[0, :])
H[1] = (8 / B) * spins
M0 = np.sum(patterns[0] * np.tanh(B * H[0])) / N
# Calculate u for theta 0
u[0] = B * (1 - M0) if B < 10 else ((B ** (a)) * M0)
y = iterate(H, J, M, a, B, u)
key = round(M0 * 20)
values[key] += y
count[key] += 1
result[r, j] = [v / c if c > 0 else 0 for v, c in zip(values, count)]
return result
@njit(fastmath=True, error_model="numpy", nogil=True)
def iterate(H, J, M, a, B, u):
N = len(J) # Size of Neurons N
L = len(H) # max iterations
for t in range(1, L - 1):
M[t] = np.tanh(B * H[t, :])
if B >= 10:
q = np.absolute(np.sum(M[t])) / N
u[t] = (B ** (a)) * q
else:
q = np.sum(M[t] ** 2) / N
u[t] = B * (1 - q)
x = (a * u[t]) / (1 - u[t - 1])
for i in range(N):
H[t + 1, i] = (
(J[i, :] @ M[t, :]) - (a * M[t, i]) - (u[t] * H[t, i]) - x * M[t - 1, i]
)
H[t + 1, :] /= 1 - u[t]
if np.mean(np.absolute(H[t + 1, :] - H[t, :])) < 0.000001:
return 1
return 0
Y1, Y2 = [], []
result = simulate(T, N, S, max_iteration, pattern_nums)
Y1 = result[0]
Y2 = result[1]
print(
f"Generated {len(pattern_nums)} Hopfield Models and iterated each with T1 = 0.01 and T2 = 0.3 in {time.time() - t}s"
)
plt.subplot(1, 2, 1)
for i in range(len(Y1)):
plt.plot(np.arange(0, 1, 0.05), Y1[i], label=pattern_nums[i])
plt.legend()
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.subplot(1, 2, 2)
for i in range(len(Y2)):
plt.plot(np.arange(0, 1, 0.05), Y2[i], label=pattern_nums[i])
plt.legend()
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.show()
Thanks for your help.
- SACDevOps