Source code for mdtraj.geometry.sasa

##############################################################################
# MDTraj: A Python Library for Loading, Saving, and Manipulating
#         Molecular Dynamics Trajectories.
# Copyright 2012-2013 Stanford University and the Authors
#
# Authors: Robert McGibbon
# Contributors:
#
# MDTraj is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 2.1
# of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with MDTraj. If not, see <http://www.gnu.org/licenses/>.
##############################################################################


from copy import deepcopy

import numpy as np

from mdtraj.geometry import _geometry
from mdtraj.utils import ensure_type

__all__ = ["shrake_rupley"]

# These van der waals radii are taken from
# https://en.wikipedia.org/wiki/Atomic_radii_of_the_elements_(data_page)
# which references
# A. Bondi (1964). "van der Waals Volumes and Radii". J. Phys. Chem. 68:
#   441. doi:10.1021/j100785a001 and doi:10.1021/jp8111556.
# M. Mantina et al. (2009). "Consistent van der Waals Radii for the Whole Main Group".
#   J. Phys. Chem. A. 113 (19): 5806--12. doi:10.1021/jp8111556
# Where no van der Waals value is known, a default of 2 angstroms is used.
# However, because certain atoms in biophysical simulations have a high
# chance of being completely ionized, we have decided to give the
# following atoms their ionic radii their ionic radii:
# +2: Be, Mg, Ca, Ba
# +1: Li, Na, K, Cs
# -1: Cl
# These ionic radii are were taken from:
# Shannon, R. D. Revised effective ionic radii and systematic studies of interatomic distances in
#   halides and chalcogenides. Acta Crystallographica Section A 32, 751--767 (1976).
#   doi:10.1107/S0567739476001551
# For most atoms, adding electrons usually doesn't change the radius much
# (<10%), while removing them changes it substantially (>50%). Further,
# when atoms like N, S, and P, are positive, they are bound to atoms in such
# a way that would "hide" their radii anyway. We have therefore chosen to just
# use their vdW radii.

_ATOMIC_RADII = {
    "H": 0.120,
    "He": 0.140,
    "Li": 0.076,
    "Be": 0.059,
    "B": 0.192,
    "C": 0.170,
    "N": 0.155,
    "O": 0.152,
    "F": 0.147,
    "Ne": 0.154,
    "Na": 0.102,
    "Mg": 0.086,
    "Al": 0.184,
    "Si": 0.210,
    "P": 0.180,
    "S": 0.180,
    "Cl": 0.181,
    "Ar": 0.188,
    "K": 0.138,
    "Ca": 0.114,
    "Sc": 0.211,
    "Ti": 0.200,
    "V": 0.200,
    "Cr": 0.200,
    "Mn": 0.200,
    "Fe": 0.200,
    "Co": 0.200,
    "Ni": 0.163,
    "Cu": 0.140,
    "Zn": 0.139,
    "Ga": 0.187,
    "Ge": 0.211,
    "As": 0.185,
    "Se": 0.190,
    "Br": 0.185,
    "Kr": 0.202,
    "Rb": 0.303,
    "Sr": 0.249,
    "Y": 0.200,
    "Zr": 0.200,
    "Nb": 0.200,
    "Mo": 0.200,
    "Tc": 0.200,
    "Ru": 0.200,
    "Rh": 0.200,
    "Pd": 0.163,
    "Ag": 0.172,
    "Cd": 0.158,
    "In": 0.193,
    "Sn": 0.217,
    "Sb": 0.206,
    "Te": 0.206,
    "I": 0.198,
    "Xe": 0.216,
    "Cs": 0.167,
    "Ba": 0.149,
    "La": 0.200,
    "Ce": 0.200,
    "Pr": 0.200,
    "Nd": 0.200,
    "Pm": 0.200,
    "Sm": 0.200,
    "Eu": 0.200,
    "Gd": 0.200,
    "Tb": 0.200,
    "Dy": 0.200,
    "Ho": 0.200,
    "Er": 0.200,
    "Tm": 0.200,
    "Yb": 0.200,
    "Lu": 0.200,
    "Hf": 0.200,
    "Ta": 0.200,
    "W": 0.200,
    "Re": 0.200,
    "Os": 0.200,
    "Ir": 0.200,
    "Pt": 0.175,
    "Au": 0.166,
    "Hg": 0.155,
    "Tl": 0.196,
    "Pb": 0.202,
    "Bi": 0.207,
    "Po": 0.197,
    "At": 0.202,
    "Rn": 0.220,
    "Fr": 0.348,
    "Ra": 0.283,
    "Ac": 0.200,
    "Th": 0.200,
    "Pa": 0.200,
    "U": 0.186,
    "Np": 0.200,
    "Pu": 0.200,
    "Am": 0.200,
    "Cm": 0.200,
    "Bk": 0.200,
    "Cf": 0.200,
    "Es": 0.200,
    "Fm": 0.200,
    "Md": 0.200,
    "No": 0.200,
    "Lr": 0.200,
    "Rf": 0.200,
    "Db": 0.200,
    "Sg": 0.200,
    "Bh": 0.200,
    "Hs": 0.200,
    "Mt": 0.200,
    "Ds": 0.200,
    "Rg": 0.200,
    "Cn": 0.200,
    "Uut": 0.200,
    "Fl": 0.200,
    "Uup": 0.200,
    "Lv": 0.200,
    "Uus": 0.200,
    "Uuo": 0.200,
}


[docs] def shrake_rupley( traj, probe_radius=0.14, n_sphere_points=960, mode="atom", change_radii=None, get_mapping=False, atom_indices=None ): """Compute the solvent accessible surface area of each atom or residue in each simulation frame. Parameters ---------- traj : Trajectory An mtraj trajectory. probe_radius : float, optional The radius of the probe, in nm. n_sphere_points : int, optional The number of points representing the surface of each atom, higher values leads to more accuracy. mode : {'atom', 'residue'} In mode == 'atom', the extracted areas are resolved per-atom In mode == 'residue', this is consolidated down to the per-residue SASA by summing over the atoms in each residue. change_radii : dict, optional A partial or complete dict containing the radii to change from the defaults. Should take the form {"Symbol" : radii_in_nm }, e.g. {"Cl" : 0.175 } to change the radii of Chlorine to 175 pm. get_mapping : bool, optional Instead of returning only the areas, also return the indices of the atoms or the residue-to-atom mapping. If True, will return a tuple that contains the areas and the mapping (np.array, shape=(n_atoms)). atom_indices : iterable, optional Selection of atoms indices for which the SASA will be computed. Default is all atoms, but a sub-selection of atoms can be passed here. This selection doesn't affect what atoms are considered accessibility blockers, it only affects for what atoms the SASA is computed. E.g. you can pass a lipid-embedded protein (s.t. the lipids are considered blockers) but only compute SASA for the protein using atom_selection = traj.top.select("protein"). The excluded atoms/residues get a SASA value -1. Returns ------- areas : np.array, shape=(n_frames, n_features) The accessible surface area of each atom or residue in every frame. If mode == 'atom', the second dimension will index the atoms in the trajectory, whereas if mode == 'residue', the second dimension will index the residues. Notes ----- This code implements the Shrake and Rupley algorithm, with the Golden Section Spiral algorithm to generate the sphere points. The basic idea is to great a mesh of points representing the surface of each atom (at a distance of the van der waals radius plus the probe radius from the nuclei), and then count the number of such mesh points that are on the molecular surface -- i.e. not within the radius of another atom. Assuming that the points are evenly distributed, the number of points is directly proportional to the accessible surface area (its just 4*pi*r^2 time the fraction of the points that are accessible). There are a number of different ways to generate the points on the sphere -- possibly the best way would be to do a little "molecular dyanmics" : put the points on the sphere, and then run MD where all the points repel one another and wait for them to get to an energy minimum. But that sounds expensive. This code uses the golden section spiral algorithm (picture at http://xsisupport.com/2012/02/25/evenly-distributing-points-on-a-sphere-with-the-golden-sectionspiral/) where you make this spiral that traces out the unit sphere and then put points down equidistant along the spiral. It's cheap, but not perfect. The gromacs utility g_sas uses a slightly different algorithm for generating points on the sphere, which is based on an icosahedral tesselation. roughly, the icosahedral tesselation works something like this http://www.ziyan.info/2008/11/sphere-tessellation-using-icosahedron.html References ---------- .. [1] Shrake, A; Rupley, JA. (1973) J Mol Biol 79 (2): 351--71. """ xyz = ensure_type( traj.xyz, dtype=np.float32, ndim=3, name="traj.xyz", shape=(None, None, 3), warn_on_cast=False, ) if mode == "atom": dim1 = xyz.shape[1] atom_mapping = np.arange(dim1, dtype=np.int32) elif mode == "residue": dim1 = traj.n_residues atom_mapping = np.array( [a.residue.index for a in traj.top.atoms], dtype=np.int32, ) if not np.all( np.unique(atom_mapping) == np.arange(np.int64(1) + np.max(atom_mapping)), ): raise ValueError( "residues must have contiguous integer indices " "starting from zero", ) else: raise ValueError('mode must be one of "residue", "atom". "%s" supplied' % mode) if atom_indices is None: atom_selection_mask = np.ones(traj.n_atoms, dtype=np.int32) out = np.zeros((xyz.shape[0], dim1), dtype=np.float32) else: atom_selection_mask = np.array([[1 if ii in atom_indices else 0][0] for ii in range(traj.n_atoms)], dtype=np.int32) out = np.full((xyz.shape[0], dim1), -1, dtype=np.float32) out[:,atom_mapping[atom_indices]]=0 modified_radii = {} if change_radii is not None: # in case _ATOMIC_RADII is in use elsehwere... modified_radii = deepcopy(_ATOMIC_RADII) # Now, modify the values specified in 'change_radii' for k, v in change_radii.items(): modified_radii[k] = v if bool(modified_radii): atom_radii = [modified_radii[atom.element.symbol] for atom in traj.topology.atoms] else: atom_radii = [_ATOMIC_RADII[atom.element.symbol] for atom in traj.topology.atoms] radii = np.array(atom_radii, np.float32) + probe_radius _geometry._sasa(xyz, radii, int(n_sphere_points), atom_mapping, atom_selection_mask, out) if get_mapping is True: return out, atom_mapping else: return out