1 #!/usr/bin/env python3 2 3 ## README ##################################################################### 4 # 5 # This script is meant to measure pi-pi stacking between the solute and solvent 6 # atoms. It will evaluate only those solvent atoms within a certain cutoff, 7 # defined by the user (below). 8 9 cutoff = 10.0 10 11 # This cutoff distance is based upon the approximated center of the ring 12 # containing the pi system. The center is approximated by using two of the 13 # three coordinates given by the user. The two atoms which will approximate the 14 # center need to be the second and third atoms defined in the lists below. The 15 # atoms are defined by the atom labels as they appear in the pdb file. Please 16 # note, this script works with pdb files ONLY. 17 18 solute_atoms = ['C0C', 'C0E', 'C09'] 19 solvent_atoms = ['C02', 'N04', 'C05'] 20 21 # Please ensure that all three atoms are in the aromatic ring, as these three 22 # atoms will be used to form the plane in which the pi system lies. 23 # 24 # The script will find the angles between the planes involved in the (hopefully 25 # found) pi-pi stacking, as well as take an average and standard deviation. 26 # Output for every solvent molecule evaluated is printed in an output file 27 # named 'output_pi-pi.txt'. 28 # 29 # !!The above two variables should be all that is required for the user to 30 # change!! 31 # 32 ## INVOCATION ################################################################# 33 # 34 # Ensure that the script is marked executable or explicitly invoke python 35 # (version 3.2 minimal) to run the script. Any pdb file which you'd like to 36 # analyze should be given as an argument. Shell expansion is handled 37 # appropriately. 38 # 39 # Example: 40 # ./pipi.py *pdb 41 # ./pipi.py d50plt5 d50plt10 d50plt15 42 # 43 # Note that the filenames do not need a pdb suffix, but the script relies on 44 # the pdb format. 45 ############################################################################## 46 # 47 # Author: Billy Wayne McCann 48 # email : thebillywayne@gmail.com 49 # NOTE: My code is purposefully verbose. don't hate. 50 ############################################################################### 51 52 from sys import argv, exit 53 from math import pow, degrees, sqrt, acos 54 from sysconfig import get_python_version 55 56 # make sure we're using at least version 3.2 of Python 57 if float(get_python_version()) < 3.2: 58 print("Requires Python 3.2 or higher.") 59 exit(0) 60 61 # make sure arguments have been given to the script 62 if len(argv[1:]) == 0: 63 print("Give pdb files as an argument to this script.") 64 exit(0) 65 else: 66 pdbs = argv[1:] 67 68 # species are separated in the pdb file by 'TER ' 69 ter = 'TER ' 70 71 # file in which to place output 72 output = open('output_pi-pi.txt', 'w') 73 74 ### FUNCTIONS ############################################################### 75 76 def find_TERs(content): 77 ''' Find where the TER's in the pdb file occur''' 78 79 terlines = [] 80 terline = content.index(ter) 81 terlines.append(terline) 82 while True: 83 try: 84 terline = content.index(ter, terline+1) 85 terlines.append(terline) 86 except: 87 return terlines 88 89 def get_coordinates(species, atoms): 90 ''' Find coordinates of atoms of interest of the species of interest, 91 whether the solute or the solvent. The atoms should have been defined in 92 the solute_atoms and solvent_atoms variables''' 93 94 atom_arrays = [] 95 coordinates = [] 96 97 # scan through species for specific atom entry and store into atom_arrays 98 for entry in species: 99 for atom in atoms:100 if atom in entry:101 atom_arrays.append(entry)102 103 # extract only the desired elements from the atom_arrays and store them in104 # coordinates list.105 for element in atom_arrays:106 array = element.split()107 coordinates.append(array[4])108 coordinates.append(array[5:8])109 110 # in binary solvents, the coordinates will sometimes not be found in a111 # particular solvent molecule. return None and test for it in the main112 # body.113 if not coordinates:114 return [None,None] 115 116 # the molecular id is the first entry in the coordinates list117 mol_id = coordinates[0]118 119 # the actual coordinates are the even entries in the coordinates list120 coordinates = [coordinates[i] for i in range(len(coordinates)) if float(i)121 % 2 != 0]122 return mol_id, coordinates123 124 def vectorize(coordinate1, coordinate2):125 '''Take coordinates and return a vector'''126 127 # extract x, y, z values from coordinates128 # make them floats for the subtraction operations in the return statement129 x1 = float(coordinate1[0])130 x2 = float(coordinate2[0])131 y1 = float(coordinate1[1])132 y2 = float(coordinate2[1])133 z1 = float(coordinate1[2])134 z2 = float(coordinate2[2])135 return [x2-x1, y2-y1, z2-z1]136 137 def dotproduct(vector1, vector2):138 '''Return the dot product between two vectors'''139 140 return vector1[0]*vector2[0]+vector1[1]*vector2[1]+vector1[2]*vector2[2]141 142 def crossproduct(vector1, vector2):143 '''Find the cross product between two vectors'''144 145 return [vector1[1]*vector2[2]-vector1[2]*vector2[1],146 vector1[2]*vector2[0]-vector1[0]*vector2[2],147 vector1[0]*vector2[1]-vector1[1]*vector2[0]]148 149 def magnitude(vector):150 '''Return the magnitude of a vector'''151 152 return sqrt(vector[0]*vector[0]+vector[1]*vector[1]+153 vector[2]*vector[2])154 155 def unit(vector):156 '''Return the unit vector of a vector'''157 158 mag = magnitude(vector)159 unit_vector = []160 for scalar in vector:161 unit_vector.append(scalar/mag)162 return unit_vector163 164 def center(coordinate1, coordinate2):165 ''' Given two coordinates, find the midpoint between them. 166 This function is used to approximate the center of a species. '''167 x1 = float(coordinate1[0])168 x2 = float(coordinate2[0])169 y1 = float(coordinate1[1])170 y2 = float(coordinate2[1])171 z1 = float(coordinate1[2])172 z2 = float(coordinate2[2])173 174 return [(x2+x1)/2, (y2+y1)/2, (z2+z1)/2]175 176 def distance(coordinate1, coordinate2):177 ''' Find the distance between two points'''178 x1 = float(coordinate1[0])179 x2 = float(coordinate2[0])180 y1 = float(coordinate1[1])181 y2 = float(coordinate2[1])182 z1 = float(coordinate1[2])183 z2 = float(coordinate2[2])184 185 return sqrt((x2-x1)**2 + (y2-y1)**2 + (z2-z1)**2)186 187 def normal(coordinates):188 ''' Given three coordinates, find the normal to the plane created by the189 three coordinates'''190 191 vector1 = vectorize(coordinates[0], coordinates[1])192 vector2 = vectorize(coordinates[1], coordinates[2])193 194 return crossproduct(vector1, vector2)195 196 def calculate_angle(normal1, normal2):197 ''' Calculate the angle between two planes.'''198 199 # make normals into unit vectors first200 normal1 = unit(normal1)201 normal2 = unit(normal2)202 203 # the angle between the two planes of atomic coordinates204 # is the arccosine of dot product of the two normals.205 return degrees(acos(dotproduct(normal1, normal2)))206 207 def stddev(all_values, average_value):208 '''Find the standard deviation of the angles'''209 210 # create a new list which is the difference between the average of the211 # values and each individual value.212 square_diff_from_average = [pow((i-average_value), 2) for i in all_values]213 214 # the standard deviation is square root of the sum of the differences215 # between each each value and the average squared divided by the number of216 # values in the list ('population' standard deviation).217 return sqrt(sum(square_diff_from_average)/len(square_diff_from_average))218 219 ## MAIN ######################################################################220 221 # initialize dictionary to store solvent data into222 solvent_data = {} 223 224 print("Using a cutoff of {0} Angstroms.".format(cutoff))225 output.write("Using a cutoff of {0} Angstroms.\n\n".format(cutoff))226 227 for pdb in pdbs:228 with open(pdb, 'r') as pdb_file:229 contents = pdb_file.read().split('\n')230 TERlines = find_TERs(contents)231 232 # find solutes coordinates, center, and normal vector233 solute = contents[0:TERlines[0]]234 solute_coordinates = get_coordinates(solute, solute_atoms)[1]235 solute_center = center(solute_coordinates[1], solute_coordinates[2]) 236 solute_normal = normal(solute_coordinates)237 238 solvents = []239 for i in range(len(TERlines)-1):240 solvent = contents[TERlines[i]:TERlines[i+1]]241 solvents.append(solvent)242 243 # keep track of how many solvents are within cutoff in each pdb244 j = 0245 for solvent in solvents:246 solvent_name, solvent_coordinates = get_coordinates(solvent, solvent_atoms)247 248 if solvent_coordinates is None:249 continue250 251 solvent_name = int(solvent_name)252 solvent_center = center(solvent_coordinates[1],253 solvent_coordinates[2])254 radius = distance(solute_center, solvent_center)255 256 if radius < cutoff:257 solvent_normal = normal(solvent_coordinates)258 angle = calculate_angle(solute_normal, solvent_normal)259 if solvent_name not in solvent_data:260 solvent_data[solvent_name] = { 'pdb': [], 261 'distance': [], 'angle': []}262 solvent_data[solvent_name]['pdb'].append(pdb)263 solvent_data[solvent_name]['distance'].append(radius)264 solvent_data[solvent_name]['angle'].append(angle)265 j += 1266 267 else:268 continue269 270 print("PDB {0} contains {1} solvent(s) within the cutoff.".format(pdb, j))271 output.write("PDB {0} contains {1} solvent(s) within the cutoff.\n".format(pdb, j))272 273 # end of for pdb loop274 275 output.write("\n")276 277 # analyze the accepted solvent data.278 for accepted in sorted(solvent_data):279 output.write('Solvent ID: {0}\n'.format(accepted))280 281 for index in range(len(solvent_data[accepted]['pdb'])):282 output.write('+ PDB name: {0}\t\t'.format(solvent_data[accepted]['pdb'][index]))283 output.write('Distance: {0:.2f}\t'.format(solvent_data[accepted]['distance'][index]))284 output.write('Angle: {0:.2f}\t\n'.format(solvent_data[accepted]['angle'][index]))285 286 output.write('== AVERAGES ==\n')287 average_distance = sum(solvent_data[accepted]['distance'])/len(solvent_data[accepted]['distance'])288 distance_stddev = stddev(solvent_data[accepted]['distance'],289 average_distance)290 average_angle = sum(solvent_data[accepted]['angle'])/len(solvent_data[accepted]['angle'])291 angle_stddev = stddev(solvent_data[accepted]['angle'], average_angle)292 output.write("Distance:\t{0:.2f} +- ".format(average_distance))293 output.write("{0:.2f}.\n".format(distance_stddev))294 output.write("Angle: \t{0:.2f} +- ".format(average_angle))295 output.write("{0:.2f}.\n\n".format(angle_stddev))296 297 output.close()298 exit(0)