I am attaching another bit of code:
import numpy as np
import numba
import time
from numba import njit
@njit(cache=True)
def pbc_r2(i,j,cellsize):
inv_cellsize = 1.0 / cellsize
xdist = j[0]-i[0]
ydist = j[1]-i[1]
zdist = j[2]-i[2]
xk = xdist-cellsize[0]*np.rint(xdist*inv_cellsize[0])
yk = ydist-cellsize[1]*np.rint(ydist*inv_cellsize[1])
zk = zdist-cellsize[2]*np.rint(zdist*inv_cellsize[2])
return xk**2+yk**2+zk**2
@njit('float64[:](float64[:,:], float64[:,:], float64[:])',cache=True,parallel=True)
def pbc_r2_vec(i,j,cellsize):
assert i.size == j.size
r2=np.zeros((i.shape[0]),dtype=numba.float64)
inv_cellsize = 1.0 / cellsize
for idx in numba.prange(i.shape[0]):
xdist = j[idx,0]-i[idx,0]
ydist = j[idx,1]-i[idx,1]
zdist = j[idx,2]-i[idx,2]
xk = xdist-cellsize[0]*np.rint(xdist*inv_cellsize[0])
yk = ydist-cellsize[1]*np.rint(ydist*inv_cellsize[1])
zk = zdist-cellsize[2]*np.rint(zdist*inv_cellsize[2])
r2[idx]=xk**2+yk**2+zk**2
return r2
#@njit('float64[:,:](float64[:,:], float64[:,:], float64[:])',cache=True, parallel=True)
@njit('float64[:,:](float64[:,:], float64[:,:], float64[:])', cache=True)
def myfunc(mol1Coord,mol2Coord,cellsize):
r2Vec=np.zeros((int(mol2Coord.shape[0]/3),5), dtype=np.float64)
for j in numba.prange(int(mol2Coord.shape[0]/3)):
rIVec=np.zeros((5,3), dtype=np.float64)
rJVec=np.zeros((5,3), dtype=np.float64)
rIVec=np.zeros((5,3), dtype=np.float64)
rIVec[0] = rIVec[1] = rIVec[2] = mol1Coord[0]
rIVec[3] = rIVec[4] = rJVec[0] = mol2Coord[3*j]
rJVec[1] = mol2Coord[3*j+1]
rJVec[2] = mol2Coord[3*j+2]
rJVec[3] = mol1Coord[1]
rJVec[4] = mol1Coord[2]
#r2Vec[0]=pbc_r2(rIVec[0],rJVec[0],cellsize)
#r2Vec[1]=pbc_r2(rIVec[1],rJVec[1],cellsize)
#r2Vec[2]=pbc_r2(rIVec[2],rJVec[2],cellsize)
#r2Vec[3]=pbc_r2(rIVec[3],rJVec[3],cellsize)
#r2Vec[4]=pbc_r2(rIVec[4],rJVec[4],cellsize)
r2Vec[j]=pbc_r2_vec(rIVec,rJVec,cellsize)
return r2Vec
mol1Coord=np.array([[-3.51539114, -0.54503553, 13.00695019],
[-3.73028985, -1.42658899, 12.47799689],
[-4.06549919, -0.53207096, 13.91130745]])
mol2Coord=np.array([[-1.08085597, -2.37911769, 13.22645743],
[-0.42385161, -2.1416567 , 13.96915362],
[-1.90155566, -1.82187172, 13.16176887],
[-3.76115629, -2.90836846, 11.50626325],
[-3.38328776, -3.81238934, 11.42368819],
[-4.46727316, -2.84933116, 10.74846771],
[-1.81306444, -1.03291112, 10.46160559],
[-1.97258684, -0.25140428, 11.11214725],
[-2.46598435, -1.79344582, 10.6389096 ],
[-5.13711051, -0.30008118, 15.31035477],
[-5.36262698, -0.45602657, 16.30489416],
[-6.05201763, 0.01835542, 14.87068595],
[-1.86112701, 1.45820731, 12.22619147],
[-2.45066116, 1.78841197, 11.49926148],
[-2.4930514 , 0.87405741, 12.82700715]])
cellsize=np.array([26.40,26.40,70])
timesave=[]
iterations=1000
print(myfunc(mol1Coord,mol2Coord,cellsize))
print("myfunc => ", myfunc.nopython_signatures)
print("pbc_r2_vec => ", pbc_r2_vec.nopython_signatures)
for i in range(iterations):
start = time.perf_counter()
myfunc(mol1Coord,mol2Coord,cellsize)
timesave.append(time.perf_counter()-start)
print("Took {:.3g} ms".format(np.mean(timesave[:1])*1000.0))
So currently myfunc(mol1Coord,mol2Coord,cellsize)
takes about 1.2 ms per execution while with parallel=True
the time is 0.335 ms. I had initially thought that calling distance calculation pbc_r2
five times in the function might be contributing to the overhead so I tried to combine all distance calculations in one function, but there is no speedup.