We want to calculate the second virial coefficient for a system of hard spherocylinders using Monte Carlo integration. Our aim is to compare four different methods for efficiency, namely:
We shall start with the first one, using loops in Python, by coding this in as we would if we were using C or Fortran.
First, we import some libraries/modules
import numpy as np #we always need arrays
import pandas as pd #we want to put our results in a table
import matplotlib.pyplot as plt #and we wouldn't mind plotting them
from scipy.optimize import brentq #we will need to find the root of a function
import time #and of course, we want to time everything
We now need to define a few functions: the following two are used to find the value of alpha for a given nematic order parameter
def nematic_orderparam(alpha, S2):
"""Function to calculate the nematic order parameter for a given alpha, minus a guess"""
return 1.0 + 3.0/(alpha*alpha) - 3.0/(alpha*np.tanh(alpha))-S2
def find_alpha(S2):
"""Function to find the alpha for a given nematic order parameter"""
return brentq(nematic_orderparam, 1e-10, 2e3, args = (S2))
Now we define three functions to generate a unit vector for three different systems:
def iso():
"""Generate a unit vector for a particle in the isotropic phase"""
u = np.empty(3)
u[2] = np.random.uniform()
x = twopi*np.random.uniform()
fac = np.sqrt(1.0-u[2]*u[2])
u[0] = fac*np.cos(x)
u[1] = fac*np.sin(x)
return u
def nem():
"""Generate a unit vector for a particle in a perfectly aligned nematic phase"""
u = np.zeros(3)
u[2] = 1.0
return u
def onsager(S2, alpha, sh_alpha):
"""Generate a unit vector from the Onsage trial function for a given nematic order parameter S2"""
u = np.empty(3)
x = np.random.uniform()*sh_alpha
u[2] = np.log(x + np.sqrt(1.0 + x*x))/alpha
x = twopi*np.random.uniform()
fac = np.sqrt(1.0-u[2]*u[2])
u[0] = fac*np.cos(x)
u[1] = fac*np.sin(x)
return u
And we also define a function to test for overlaps between particles
def overlap(sep,ui,uj):
"""Test for overlaps between two spherocylinders: returns True if they do"""
over = False
rui = np.sum(sep*ui)
ruj = np.sum(sep*uj)
uij = np.sum(ui*uj)
sinsq = 1.0 - np.sum(uij*uij)
if sinsq < 1e-14:
ci = -rui*0.5
cj = ruj*0.5
else:
ci = (-rui+uij*ruj)/sinsq
cj = (ruj-uij*rui)/sinsq
if np.maximum(np.absolute(ci), np.absolute(cj)) >= xld2:
if np.absolute(ci) >= np.absolute(cj):
ci = xld2*np.sign(ci)
cj = ruj + ci*uij
else:
cj = xld2*np.sign(cj)
ci = -rui + cj*uij
if np.absolute(ci) > xld2:
ci = xld2*np.sign(ci)
if np.absolute(cj) > xld2:
cj = xld2*np.sign(cj)
di = 2.0*rui + ci-cj*uij
dj = -2.0*ruj + cj-ci*uij
sepsq = np.sum(sep*sep)
if sepsq + ci*di + cj*dj < 1.0:
over = True
return over
And finally we need a function that calculates the value of the virial coefficients.
def onsager_B2(n_batch,n_trial, n_points):
"""Calculate the nematic order parameter using n_batch*n_trial configurations, for n_points order parameters"""
rmax = 2.0*xld2 + 1.0
fac = 4.0*rmax*rmax*rmax
r1 = np.zeros(3)
u1 = np.zeros(3)
u2 = np.zeros(3)
output = {}
output['S2'] = np.zeros(n_points)
output['B2'] = np.zeros(n_points)
output['error'] = np.zeros(n_points)
for i_s in range(n_points):
S2 = i_s/(n_points-1)
if S2 < 1.0 and S2 > 0.0:
alpha = find_alpha(S2)
sh_alpha = np.sinh(alpha)
B2 = 0.0
B2sq = 0.0
error = 0.0
for ib in range(n_batch):
B2_sub = 0.0
n_over = 0.0
for it in range(n_trial):
r2 = np.random.uniform(-rmax,rmax,3)
if S2 < 1e-10:
u1 = iso()
u2 = iso()
elif S2 > 1.0-1e-10:
u1 = nem()
u2 = nem()
else:
u1 = onsager(S2, alpha, sh_alpha)
u2 = onsager(S2, alpha, sh_alpha)
if overlap(r2-r1, u1, u2):
n_over += 1.0
B2_sub = n_over/n_trial
B2 += B2_sub
B2sq += B2_sub*B2_sub
B2 = B2/n_batch
B2sq = B2sq/n_batch
error = np.sqrt((B2sq-B2*B2)/(n_batch-1))
output['S2'][i_s] = S2
output['B2'][i_s] = B2*fac
output['error'][i_s] = error*fac
print(output['S2'][i_s] ,output['B2'][i_s] ,output['error'][i_s] )
return output
And now we run it all. We take the time before and after to measure performance.
We will look at spherocylinders with L/D = 5, which means the parameter xld2 = L/2 = 2.5
twopi = 2.0*np.pi
xld2 = 2.5
t0 = time.clock()
B2 = onsager_B2(200,200,21)
output_table = pd.DataFrame(B2)
output_table = output_table[["S2", "B2", "error"]]
#print(output_table)
t1 = time.clock()
print('')
print('CPU time for loops in Python:', t1-t0)
This took about 1 minute for 200*200 = 40,000 trial configurations. Not that fast.
Now we will rewrite these functions to do as much of the computation as we can vector-wise. This makes it all much faster, since then more of the run takes place in the compiled C/Fortran code that this is all built on, rather than in python loops.
def overlap_vector(sep,ui,uj,n_loop):
"""Function to test for overlaps between particles, takes n_loop configurations at once"""
rui = np.sum(sep*ui,axis=1)
ruj = np.sum(sep*uj,axis=1)
uij = np.sum(ui*uj,axis=1)
sinsq = 1.0 - uij*uij
ci = np.empty(n_loop)
cj = np.empty(n_loop)
listings = sinsq < 1e-14
nlistings = ~listings
ci[listings] = -rui[listings]*0.5
cj[listings] = ruj[listings]*0.5
ci[nlistings] = (-rui[nlistings]+uij[nlistings]*ruj[nlistings])/sinsq[nlistings]
cj[nlistings] = (ruj[nlistings]-uij[nlistings]*rui[nlistings])/sinsq[nlistings]
listings = (np.maximum(np.absolute(ci), np.absolute(cj)) >= xld2) & (np.absolute(ci) >= np.absolute(cj))
ci[listings] = xld2*np.sign(ci[listings])
cj[listings] = ruj[listings] + ci[listings]*uij[listings]
listings = (np.maximum(np.absolute(ci), np.absolute(cj)) >= xld2) & (np.absolute(ci) < np.absolute(cj))
cj[listings] = xld2*np.sign(cj[listings])
ci[listings] = -rui[listings] + cj[listings]*uij[listings]
listings = (np.maximum(np.absolute(ci), np.absolute(cj)) >= xld2) & (np.absolute(ci) > xld2)
ci[listings] = xld2*np.sign(ci[listings])
listings = (np.maximum(np.absolute(ci), np.absolute(cj)) >= xld2) & (np.absolute(cj) > xld2)
cj[listings] = xld2*np.sign(cj[listings])
di = 2.0*rui + ci-cj*uij
dj = -2.0*ruj + cj-ci*uij
sepsq = np.sum(sep*sep, axis=1)
over = sepsq + ci*di + cj*dj < 1.0
return over
def iso_vector(n_loop):
"""Generates n_loop unit vectors in the isotropic phase"""
u = np.empty((n_loop, 3))
u[:,2] = np.random.uniform(0,1,n_loop)
x = np.random.uniform(0,twopi,n_loop)
fac = np.sqrt(1.0-u[:,2]*u[:,2])
u[:,0] = fac*np.cos(x)
u[:,1] = fac*np.sin(x)
return u
def nem_vector(n_loop):
"""Generates n_loop unit vectors in the perfectly aligned nematic phase"""
u = np.zeros((n_loop, 3))
u[:,2] = 1.0
return u
def onsager_vector(S2, alpha, sh_alpha, n_loop):
"""Generates n_loop unit vectors from the Onsager trial function for a given nematic order parameter"""
u = np.empty((n_loop, 3))
x = np.random.uniform(0,sh_alpha,n_loop)
u[:,2] = np.log(x + np.sqrt(1.0 + x*x))/alpha
x = np.random.uniform(0,twopi,n_loop)
fac = np.sqrt(1.0-u[:,2]*u[:,2])
u[:,0] = fac*np.cos(x)
u[:,1] = fac*np.sin(x)
return u
def onsager_B2_vectorized(n_loop, n_points):
"""Calculates the second virial coefficient, using n_loop configurations for each of the n_points values of S_nem"""
rmax = 2.0*xld2 + 1.0
fact = 4.0*rmax*rmax*rmax
output = {}
output['S2'] = np.zeros(n_points)
output['B2'] = np.zeros(n_points)
output['error'] = np.zeros(n_points)
B2_sub = np.empty(n_loop)
r1 = np.zeros(3)
for i_s in range(n_points):
r2 = np.random.uniform(-rmax,rmax,(n_loop,3))
S2 = i_s/(n_points-1)
if S2 < 1.0 and S2 > 0.0:
alpha = find_alpha(S2)
sh_alpha = np.sinh(alpha)
if S2 < 1e-10:
u1 = iso_vector(n_loop)
u2 = iso_vector(n_loop)
elif S2 > 1.0-1e-10:
u1 = nem_vector(n_loop)
u2 = nem_vector(n_loop)
else:
u1 = onsager_vector(S2, alpha, sh_alpha, n_loop)
u2 = onsager_vector(S2, alpha, sh_alpha, n_loop)
n_over = overlap_vector(r2-r1, u1, u2, n_loop)
# n_over = np.zeros(n_loop)
# for i in range(n_loop):
# if overlap(r2[i,:]-r1[:], u1[i,:], u2[i,:]):
# n_over[i] += 1.0
#print(np.sum(n_over), (u1*u1).sum(axis))
B2 = fact*np.sum(n_over)/n_loop
error = fact*n_over.std()/np.sqrt(n_loop-1)
output['S2'][i_s] = S2
output['B2'][i_s] = B2
output['error'][i_s] = error
print(output['S2'][i_s] ,output['B2'][i_s] ,output['error'][i_s] )
return output
Now we run it all. Before we set n_trial = n_batch = 200, giving 200*200 = 40,000 configurations.
Now we will use n_loop = 2000*2000 = 4,000,000 configurations
twopi = 2.0*np.pi
xld2 = 2.5
t0 = time.clock()
B2_vec = onsager_B2_vectorized(2000*2000,21)
output_table_vec = pd.DataFrame(B2_vec)
output_table_vec = output_table_vec[["S2", "B2", "error"]]
#print(output_table_vec)
t1 = time.clock()
print('')
print('CPU time for vectorized method:', t1-t0)
We acheived a similar time of 1minute, but we generated 100x more trial configurations (which means that the error on our result is 10 times smaller).
We now plot the two results, from which the reduction in the error should be visible.
plt.errorbar(output_table.S2, output_table.B2, output_table.error, color = "red",
label="original", marker = ".", markersize='10', elinewidth=2)
plt.errorbar(output_table_vec.S2, output_table_vec.B2, output_table_vec.error,
color="blue", label = "vectorized", marker = "s", markersize='7', elinewidth=2)
plt.legend(loc="lower left")
plt.xlabel("S2")
plt.ylabel("B2")
plt.margins(0.02)
plt.title("B2 vs S2: after 1 minute computation time")
plt.show()
Of course, the method using loops can be coded in in Fortran, and will run 3.5 times faster than above (so will reduce the error still further, by about a factor of 1.9, for the same computing time. But, if we do want to use python, vectorizing is the way to go.
Let's now add our extermal Fortran result to the plot
fortran_result=pd.read_csv("B2_L5d0_D1d0-race-csv.dat", header=None, names=["S2", "alpha", "B2", "error"])
print(fortran_result)
plt.errorbar(output_table.S2, output_table.B2, output_table.error, color = "red",
label="original", marker = ".", markersize='10', elinewidth=2)
plt.errorbar(output_table_vec.S2, output_table_vec.B2, output_table_vec.error,
color="blue", label = "vectorized", marker = "s", markersize='7', elinewidth=2)
plt.errorbar(fortran_result.S2, fortran_result.B2, fortran_result.error, color="green",
label = "fortran", marker = "v", markersize='10', elinewidth=2)
plt.legend(loc="lower left")
plt.xlabel("S2")
plt.ylabel("B2")
plt.margins(0.02)
plt.title("B2 vs S2: after 1 minute computation time")
plt.show()
plt.scatter(output_table.S2, (output_table.error/output_table.B2)*100.0, color = "red",
label="original", marker = ".", s=120)
plt.scatter(output_table_vec.S2, (output_table_vec.error/output_table_vec.B2)*100.0,
color="blue", label = "vectorized", marker = ",", s=60)
plt.scatter(fortran_result.S2, (fortran_result.error/fortran_result.B2)*100.0,
color="green", label = "fortran", marker = "v", s=60)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.xlabel("S2")
plt.ylabel("error (%)")
plt.margins(0.02)
plt.title("% error in B2 vs S2: after 1 minute computation time")
plt.show()
And finally, we import a Fortran function we made to calculate B2, which we can call from python.
This is identical to the Fortran code we used externally, just adapted to be readible by Python.
import onsager_B2 #import the Fortran module
t0 = time.clock()
output_table_f2py = pd.DataFrame(np.column_stack(onsager_B2.onsager_b2(5.0,1.0, 10000*1200, 21)),columns=['S2','B2', 'error'])
#output_table_vec = output_table_vec[["S2", "B2", "error"]]
print(output_table_f2py)
t1 = time.clock()
print('')
print('CPU time for imported Fortran function:', t1-t0)
This turns out to be a little slower than the external function, but not by much (a few percent maybe)
Now we can plot all these
fortran_result=pd.read_csv("B2_L5d0_D1d0-race-csv.dat", header=None, names=["S2", "alpha", "B2", "error"])
plt.errorbar(output_table.S2, output_table.B2, output_table.error, color = "red",
label="original", marker = ".", markersize='10', elinewidth=2)
plt.errorbar(output_table_vec.S2, output_table_vec.B2, output_table_vec.error,
color="blue", label = "vectorized", marker = "s", markersize='7', elinewidth=2)
plt.errorbar(fortran_result.S2, fortran_result.B2, fortran_result.error, color="green",
label = "fortran (ext)", marker = "v", markersize='10', elinewidth=2)
plt.errorbar(output_table_f2py.S2, output_table_f2py.B2, output_table_f2py.error,
color="cyan", label = "fortran (int)", marker = "*", markersize='10', elinewidth=2)
plt.legend(loc="lower left")
plt.xlabel("S2")
plt.ylabel("B2")
plt.margins(0.02)
plt.title("B2 vs S2: after 1 minute computation time")
plt.show()
plt.scatter(output_table.S2, (output_table.error/output_table.B2)*100.0, color = "red",
label="original", marker = ".", s=120)
plt.scatter(output_table_vec.S2, (output_table_vec.error/output_table_vec.B2)*100.0,
color="blue", label = "vectorized", marker = ",", s=60)
plt.scatter(fortran_result.S2, (fortran_result.error/fortran_result.B2)*100.0,
color="green", label = "fortran (ext)", marker = "v", s=60)
plt.scatter(output_table_f2py.S2, (output_table_f2py.error/output_table_f2py.B2)*100.0,
color="cyan", label = "fortran (int)", marker = "*", s=90)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.xlabel("S2")
plt.ylabel("error (%)")
plt.margins(0.02)
plt.title("% error in B2 vs S2: after 1 minute computation time")
plt.show()