Matthew Dennison

Calculating the second virial coefficient ...

... or the importance of vectorizing

We want to calculate the second virial coefficient for a system of hard spherocylinders using Monte Carlo integration. Our aim is to compare four different methods for efficiency, namely:

  • Using loops in Python
  • Using a vectorized approach in Python
  • Using Fortran code externally
  • Using a Fortran function we import into Python
  • We shall start with the first one, using loops in Python, by coding this in as we would if we were using C or Fortran.

    First, we import some libraries/modules

    import numpy as np                     #we always need arrays
    import pandas as pd                    #we want to put our results in a table
    import matplotlib.pyplot as plt        #and we wouldn't mind plotting them
    from scipy.optimize import brentq      #we will need to find the root of a function
    import time                            #and of course, we want to time everything
    

    We now need to define a few functions: the following two are used to find the value of alpha for a given nematic order parameter

    def nematic_orderparam(alpha, S2):
        """Function to calculate the nematic order parameter for a given alpha, minus a guess"""
        return 1.0 + 3.0/(alpha*alpha) - 3.0/(alpha*np.tanh(alpha))-S2
    
    def find_alpha(S2):
        """Function to find the alpha for a given nematic order parameter"""
        return brentq(nematic_orderparam, 1e-10, 2e3, args = (S2))
    

    Now we define three functions to generate a unit vector for three different systems:

    • iso: the isotropic phase, where the particles point in each direction with equal chance
    • nem: a perfectly aligned nematic phase, where all the particles point straight up
    • onsager: generates a unit vector from the Onsager distribution function, for a given nematic order parameter
    def iso():
        """Generate a unit vector for a particle in the isotropic phase"""
        u = np.empty(3)
        u[2] = np.random.uniform()
        x = twopi*np.random.uniform()
        fac = np.sqrt(1.0-u[2]*u[2])
        u[0] = fac*np.cos(x)
        u[1] = fac*np.sin(x)
        return u
    
    def nem():
        """Generate a unit vector for a particle in a perfectly aligned nematic phase"""
        u = np.zeros(3)
        u[2] = 1.0
        return u
    
    def onsager(S2, alpha, sh_alpha):
        """Generate a unit vector from the Onsage trial function for a given nematic order parameter S2"""
        u = np.empty(3)
        x = np.random.uniform()*sh_alpha
        u[2] = np.log(x + np.sqrt(1.0 + x*x))/alpha
        x = twopi*np.random.uniform()
        fac = np.sqrt(1.0-u[2]*u[2])
        u[0] = fac*np.cos(x)
        u[1] = fac*np.sin(x)
        return u
    

    And we also define a function to test for overlaps between particles

    def overlap(sep,ui,uj):
        """Test for overlaps between two spherocylinders: returns True if they do"""
        over = False
        rui   = np.sum(sep*ui)
        ruj   = np.sum(sep*uj)
        uij   = np.sum(ui*uj)
        sinsq = 1.0 - np.sum(uij*uij)
        if sinsq < 1e-14:
            ci = -rui*0.5
            cj = ruj*0.5
        else:
            ci = (-rui+uij*ruj)/sinsq
            cj = (ruj-uij*rui)/sinsq
        if np.maximum(np.absolute(ci), np.absolute(cj)) >= xld2:
            if np.absolute(ci) >= np.absolute(cj):
                ci = xld2*np.sign(ci)
                cj = ruj + ci*uij
            else:
                cj = xld2*np.sign(cj)
                ci = -rui + cj*uij
            if np.absolute(ci) > xld2:
                ci = xld2*np.sign(ci)
            if np.absolute(cj) > xld2:
                cj = xld2*np.sign(cj)
        di = 2.0*rui  + ci-cj*uij
        dj = -2.0*ruj + cj-ci*uij
        sepsq = np.sum(sep*sep)
        if sepsq + ci*di + cj*dj < 1.0:
            over = True
        return over
    

    And finally we need a function that calculates the value of the virial coefficients.

    • We set a number of S2 values that we want to study (n_points values, ranging from 0 to 1 in equally spaced steps).
    • We also set the number of configurations we will generate (i.e. the number of MC iterations). This is split into batches.
    • The funciton returns a dictionary containing the order parameters, the values for the virials and the errors
    def onsager_B2(n_batch,n_trial, n_points):
        """Calculate the nematic order parameter using n_batch*n_trial configurations, for n_points order parameters"""
        rmax = 2.0*xld2 + 1.0
        fac = 4.0*rmax*rmax*rmax
        r1 = np.zeros(3)
        u1 = np.zeros(3)
        u2 = np.zeros(3)
        output = {}
        output['S2'] = np.zeros(n_points)
        output['B2'] = np.zeros(n_points)
        output['error'] = np.zeros(n_points)
        for i_s in range(n_points):
            S2 = i_s/(n_points-1)
            if S2 < 1.0 and S2 > 0.0:
                alpha = find_alpha(S2)
                sh_alpha = np.sinh(alpha)
            B2 = 0.0
            B2sq  = 0.0
            error = 0.0
            for ib in range(n_batch):
                B2_sub = 0.0
                n_over = 0.0
                for it in range(n_trial):
                    r2 = np.random.uniform(-rmax,rmax,3)
                    if S2 < 1e-10:
                        u1 = iso()
                        u2 = iso()
                    elif S2 > 1.0-1e-10:
                        u1 = nem()
                        u2 = nem()
                    else:
                        u1 = onsager(S2, alpha, sh_alpha)
                        u2 = onsager(S2, alpha, sh_alpha)
                    if overlap(r2-r1, u1, u2):
                        n_over += 1.0
                B2_sub = n_over/n_trial
                B2    += B2_sub
                B2sq  += B2_sub*B2_sub
            B2 = B2/n_batch
            B2sq = B2sq/n_batch
            error = np.sqrt((B2sq-B2*B2)/(n_batch-1))
            output['S2'][i_s] =  S2
            output['B2'][i_s] =  B2*fac
            output['error'][i_s] =  error*fac
            print(output['S2'][i_s] ,output['B2'][i_s] ,output['error'][i_s] )
        return output
    

    And now we run it all. We take the time before and after to measure performance.

    We will look at spherocylinders with L/D = 5, which means the parameter xld2 = L/2 = 2.5

    twopi = 2.0*np.pi
    xld2 = 2.5
    t0 = time.clock()
    B2 = onsager_B2(200,200,21)
    output_table = pd.DataFrame(B2)
    output_table = output_table[["S2", "B2", "error"]]
    #print(output_table)
    t1 = time.clock()
    print('')
    print('CPU time for loops in Python:', t1-t0)
    
    0.0 37.0224 0.867525871017
    0.05 39.4416 0.882955286176
    0.1 35.9856 0.802147220914
    0.15 36.6552 0.819992673579
    0.2 36.936 0.877329341735
    0.25 35.748 0.848080730015
    0.3 36.1152 0.835885907918
    0.35 35.748 0.828503233573
    0.4 35.0568 0.863010337722
    0.45 33.9552 0.902769944484
    0.5 35.1648 0.851998658078
    0.55 33.8688 0.782816672352
    0.6 31.9248 0.750617276834
    0.65 32.1408 0.814635619626
    0.7 30.6936 0.762152595017
    0.75 30.6072 0.751233906792
    0.8 28.08 0.734968033353
    0.85 27.648 0.783750549999
    0.9 26.0928 0.723937294822
    0.95 23.544 0.728399249956
    1.0 18.7056 0.659021051966
    
    CPU time for loops in Python: 60.472637000000006
    

    This took about 1 minute for 200*200 = 40,000 trial configurations. Not that fast.

    Now we will rewrite these functions to do as much of the computation as we can vector-wise. This makes it all much faster, since then more of the run takes place in the compiled C/Fortran code that this is all built on, rather than in python loops.

    def overlap_vector(sep,ui,uj,n_loop):
        """Function to test for overlaps between particles, takes n_loop configurations at once"""
        rui   = np.sum(sep*ui,axis=1)
        ruj   = np.sum(sep*uj,axis=1)
        uij   = np.sum(ui*uj,axis=1)
        sinsq = 1.0 - uij*uij
        ci = np.empty(n_loop)
        cj = np.empty(n_loop)
        listings = sinsq < 1e-14
        nlistings = ~listings
        ci[listings] = -rui[listings]*0.5
        cj[listings] = ruj[listings]*0.5
        ci[nlistings] = (-rui[nlistings]+uij[nlistings]*ruj[nlistings])/sinsq[nlistings]
        cj[nlistings] = (ruj[nlistings]-uij[nlistings]*rui[nlistings])/sinsq[nlistings]
        listings = (np.maximum(np.absolute(ci), np.absolute(cj)) >= xld2) & (np.absolute(ci) >= np.absolute(cj))
        ci[listings] = xld2*np.sign(ci[listings])
        cj[listings] = ruj[listings] + ci[listings]*uij[listings]
        listings = (np.maximum(np.absolute(ci), np.absolute(cj)) >= xld2) & (np.absolute(ci) < np.absolute(cj))
        cj[listings] = xld2*np.sign(cj[listings])
        ci[listings] = -rui[listings] + cj[listings]*uij[listings]
        listings = (np.maximum(np.absolute(ci), np.absolute(cj)) >= xld2) &  (np.absolute(ci) > xld2)
        ci[listings] = xld2*np.sign(ci[listings])
        listings = (np.maximum(np.absolute(ci), np.absolute(cj)) >= xld2) &  (np.absolute(cj) > xld2)
        cj[listings] = xld2*np.sign(cj[listings])
        di = 2.0*rui  + ci-cj*uij
        dj = -2.0*ruj + cj-ci*uij
        sepsq = np.sum(sep*sep, axis=1)
        over = sepsq + ci*di + cj*dj < 1.0
        return over
    
    def iso_vector(n_loop):
        """Generates n_loop unit vectors in the isotropic phase"""
        u = np.empty((n_loop, 3))
        u[:,2] = np.random.uniform(0,1,n_loop)
        x = np.random.uniform(0,twopi,n_loop)
        fac = np.sqrt(1.0-u[:,2]*u[:,2])
        u[:,0] = fac*np.cos(x)
        u[:,1] = fac*np.sin(x)
        return u
    
    def nem_vector(n_loop):
        """Generates n_loop unit vectors in the perfectly aligned nematic phase"""
        u = np.zeros((n_loop, 3))
        u[:,2] = 1.0
        return u
    
    def onsager_vector(S2, alpha, sh_alpha, n_loop):
        """Generates n_loop unit vectors from the Onsager trial function for a given nematic order parameter"""
        u = np.empty((n_loop, 3))
        x = np.random.uniform(0,sh_alpha,n_loop)
        u[:,2] = np.log(x + np.sqrt(1.0 + x*x))/alpha
        x = np.random.uniform(0,twopi,n_loop)
        fac = np.sqrt(1.0-u[:,2]*u[:,2])
        u[:,0] = fac*np.cos(x)
        u[:,1] = fac*np.sin(x)
        return u
    
    def onsager_B2_vectorized(n_loop, n_points):
        """Calculates the second virial coefficient, using n_loop configurations for each of the n_points values of S_nem"""
        rmax = 2.0*xld2 + 1.0
        fact = 4.0*rmax*rmax*rmax
        output = {}
        output['S2'] = np.zeros(n_points)
        output['B2'] = np.zeros(n_points)
        output['error'] = np.zeros(n_points)
        B2_sub = np.empty(n_loop)
        r1 = np.zeros(3)
        for i_s in range(n_points):
            r2 = np.random.uniform(-rmax,rmax,(n_loop,3))
            S2 = i_s/(n_points-1)
            if S2 < 1.0 and S2 > 0.0:
                alpha = find_alpha(S2)
                sh_alpha = np.sinh(alpha)
            if S2 < 1e-10:
                u1 = iso_vector(n_loop)
                u2 = iso_vector(n_loop)
            elif S2 > 1.0-1e-10:
                u1 = nem_vector(n_loop)
                u2 = nem_vector(n_loop)
            else:
                u1 = onsager_vector(S2, alpha, sh_alpha, n_loop)
                u2 = onsager_vector(S2, alpha, sh_alpha, n_loop)
            n_over = overlap_vector(r2-r1, u1, u2, n_loop)
           # n_over = np.zeros(n_loop)
           # for i in range(n_loop):
           #     if overlap(r2[i,:]-r1[:], u1[i,:], u2[i,:]):
           #         n_over[i] += 1.0
            #print(np.sum(n_over), (u1*u1).sum(axis))
            B2 = fact*np.sum(n_over)/n_loop
            error = fact*n_over.std()/np.sqrt(n_loop-1)
            output['S2'][i_s] =  S2
            output['B2'][i_s] =  B2
            output['error'][i_s] =  error
            print(output['S2'][i_s] ,output['B2'][i_s] ,output['error'][i_s] )
        return output
    

    Now we run it all. Before we set n_trial = n_batch = 200, giving 200*200 = 40,000 configurations.

    Now we will use n_loop = 2000*2000 = 4,000,000 configurations

    twopi = 2.0*np.pi
    xld2 = 2.5
    t0 = time.clock()
    B2_vec = onsager_B2_vectorized(2000*2000,21)
    output_table_vec = pd.DataFrame(B2_vec)
    output_table_vec = output_table_vec[["S2", "B2", "error"]]
    #print(output_table_vec)
    t1 = time.clock()
    print('')
    print('CPU time for vectorized method:', t1-t0)
    
    0.0 37.451755102 0.0837820429342
    0.05 37.3804408163 0.0837058483699
    0.1 37.4033632653 0.0837303485435
    0.15 37.0781387755 0.0833819320596
    0.2 36.9000489796 0.0831904020829
    0.25 36.6526040816 0.0829234045625
    0.3 36.3585306122 0.0826047531538
    0.35 36.0583836735 0.082278002326
    0.4 35.4943346939 0.0816597443248
    0.45 35.0564571429 0.0811759243677
    0.5 34.413844898 0.0804596429098
    0.55 33.7296979592 0.0796886982638
    0.6 32.749322449 0.0785684020312
    0.65 32.0457795918 0.0777527735332
    0.7 31.2454530612 0.0768126368573
    0.75 30.0644571429 0.0754004052049
    0.8 29.0276571429 0.0741349146864
    0.85 27.5147755102 0.0722425186021
    0.9 25.789322449 0.0700127787904
    0.95 23.5305795918 0.0669665653808
    1.0 17.7982040816 0.058439409235
    
    CPU time for vectorized method: 60.933426999999995
    

    We acheived a similar time of 1minute, but we generated 100x more trial configurations (which means that the error on our result is 10 times smaller).

    We now plot the two results, from which the reduction in the error should be visible.

    plt.errorbar(output_table.S2, output_table.B2, output_table.error, color = "red", 
                 label="original", marker = ".", markersize='10', elinewidth=2)
    plt.errorbar(output_table_vec.S2, output_table_vec.B2, output_table_vec.error, 
                 color="blue", label = "vectorized", marker = "s", markersize='7', elinewidth=2)
    plt.legend(loc="lower left")
    plt.xlabel("S2")
    plt.ylabel("B2")
    plt.margins(0.02)
    plt.title("B2 vs S2: after 1 minute computation time")
    plt.show()
    

    Of course, the method using loops can be coded in in Fortran, and will run 3.5 times faster than above (so will reduce the error still further, by about a factor of 1.9, for the same computing time. But, if we do want to use python, vectorizing is the way to go.

    Let's now add our extermal Fortran result to the plot

    fortran_result=pd.read_csv("B2_L5d0_D1d0-race-csv.dat", header=None, names=["S2", "alpha", "B2", "error"])
    print(fortran_result)
    
          S2         alpha         B2     error
    0   0.00  0.000000e+00  37.423111  0.045164
    1   0.05  8.985869e-01  37.404843  0.047013
    2   0.10  1.321999e+00  37.369543  0.046639
    3   0.15  1.689315e+00  37.175451  0.046346
    4   0.20  2.042078e+00  36.897614  0.046444
    5   0.25  2.399357e+00  36.697721  0.046934
    6   0.30  2.774537e+00  36.255847  0.045914
    7   0.35  3.179995e+00  35.959680  0.045229
    8   0.40  3.629410e+00  35.423321  0.045134
    9   0.45  4.139693e+00  34.992185  0.045549
    10  0.50  4.733319e+00  34.294320  0.045141
    11  0.55  5.441841e+00  33.675758  0.044878
    12  0.60  6.311799e+00  32.876866  0.044084
    13  0.65  7.415565e+00  32.064706  0.042787
    14  0.70  8.872984e+00  31.135166  0.042287
    15  0.75  1.089898e+01  30.114041  0.041977
    16  0.80  1.392262e+01  28.845751  0.041103
    17  0.85  1.894427e+01  27.531175  0.040358
    18  0.90  2.896424e+01  25.803360  0.040318
    19  0.95  5.898275e+01  23.523634  0.037443
    20  1.00  1.000000e+14  17.808274  0.031975
    
    plt.errorbar(output_table.S2, output_table.B2, output_table.error, color = "red", 
                 label="original", marker = ".", markersize='10', elinewidth=2)
    
    plt.errorbar(output_table_vec.S2, output_table_vec.B2, output_table_vec.error, 
                 color="blue", label = "vectorized", marker = "s", markersize='7', elinewidth=2)
    
    plt.errorbar(fortran_result.S2, fortran_result.B2, fortran_result.error, color="green", 
                 label = "fortran", marker = "v", markersize='10', elinewidth=2)
    
    plt.legend(loc="lower left")
    plt.xlabel("S2")
    plt.ylabel("B2")
    plt.margins(0.02)
    plt.title("B2 vs S2: after 1 minute computation time")
    plt.show()
    
    plt.scatter(output_table.S2, (output_table.error/output_table.B2)*100.0, color = "red", 
                label="original", marker = ".", s=120)
    
    plt.scatter(output_table_vec.S2, (output_table_vec.error/output_table_vec.B2)*100.0, 
                color="blue", label = "vectorized", marker = ",", s=60)
    
    plt.scatter(fortran_result.S2, (fortran_result.error/fortran_result.B2)*100.0, 
                color="green", label = "fortran", marker = "v", s=60)
    
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.xlabel("S2")
    plt.ylabel("error (%)")
    plt.margins(0.02)
    plt.title("% error in B2 vs S2: after 1 minute computation time")
    plt.show()
    

    And finally, we import a Fortran function we made to calculate B2, which we can call from python.

    This is identical to the Fortran code we used externally, just adapted to be readible by Python.

    import onsager_B2    #import the Fortran module
    
    t0 = time.clock()
    output_table_f2py = pd.DataFrame(np.column_stack(onsager_B2.onsager_b2(5.0,1.0, 10000*1200, 21)),columns=['S2','B2', 'error'])
    #output_table_vec = output_table_vec[["S2", "B2", "error"]]
    print(output_table_f2py)
    t1 = time.clock()
    print('')
    print('CPU time for imported Fortran function:', t1-t0)
    
          S2         B2     error
    0   0.00  37.394208  0.050657
    1   0.05  37.432080  0.050750
    2   0.10  37.335960  0.050729
    3   0.15  37.176408  0.050780
    4   0.20  36.933480  0.050503
    5   0.25  36.680904  0.050325
    6   0.30  36.325224  0.050029
    7   0.35  35.924112  0.049892
    8   0.40  35.434224  0.049566
    9   0.45  34.973136  0.049173
    10  0.50  34.308720  0.048584
    11  0.55  33.581160  0.048283
    12  0.60  32.934744  0.047841
    13  0.65  32.082912  0.047029
    14  0.70  31.138488  0.046450
    15  0.75  30.171888  0.045802
    16  0.80  28.918296  0.044765
    17  0.85  27.431136  0.043744
    18  0.90  25.785360  0.042369
    19  0.95  23.481648  0.040535
    20  1.00  17.850168  0.035381
    
    CPU time for imported Fortran function: 60.74180999999999
    

    This turns out to be a little slower than the external function, but not by much (a few percent maybe)

    Now we can plot all these

    fortran_result=pd.read_csv("B2_L5d0_D1d0-race-csv.dat", header=None, names=["S2", "alpha", "B2", "error"])
    
    plt.errorbar(output_table.S2, output_table.B2, output_table.error, color = "red", 
                 label="original", marker = ".", markersize='10', elinewidth=2)
    
    plt.errorbar(output_table_vec.S2, output_table_vec.B2, output_table_vec.error, 
                 color="blue", label = "vectorized", marker = "s", markersize='7', elinewidth=2)
    
    plt.errorbar(fortran_result.S2, fortran_result.B2, fortran_result.error, color="green", 
                 label = "fortran (ext)", marker = "v", markersize='10', elinewidth=2)
    
    plt.errorbar(output_table_f2py.S2, output_table_f2py.B2, output_table_f2py.error, 
                 color="cyan", label = "fortran (int)", marker = "*", markersize='10', elinewidth=2)
    
    plt.legend(loc="lower left")
    plt.xlabel("S2")
    plt.ylabel("B2")
    plt.margins(0.02)
    plt.title("B2 vs S2: after 1 minute computation time")
    plt.show()
    
    plt.scatter(output_table.S2, (output_table.error/output_table.B2)*100.0, color = "red", 
                label="original", marker = ".", s=120)
    
    plt.scatter(output_table_vec.S2, (output_table_vec.error/output_table_vec.B2)*100.0, 
                color="blue", label = "vectorized", marker = ",", s=60)
    
    plt.scatter(fortran_result.S2, (fortran_result.error/fortran_result.B2)*100.0, 
                color="green", label = "fortran (ext)", marker = "v", s=60)
    
    plt.scatter(output_table_f2py.S2, (output_table_f2py.error/output_table_f2py.B2)*100.0, 
                color="cyan", label = "fortran (int)", marker = "*", s=90)
    
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.xlabel("S2")
    plt.ylabel("error (%)")
    plt.margins(0.02)
    plt.title("% error in B2 vs S2: after 1 minute computation time")
    plt.show()