|
# This module handles input and output of PDB files.
# It deals only with the file format, not with the
# structure of any objects in such files beyond
# atoms and residues.
#
# Written by: Konrad Hinsen
# Last revision: 1996-3-5
#
"""This module provides classes that represent PDB files and
configurations contained in PDB files. It provides access
to PDB files on two levels: low-level (line by line) and
high-level (residues and atoms).
Class PDBFile:
--------------
Creation:
PDBFile(filename, mode)
mode is 'r' for reading and 'w' for writing, default is 'r'
If the filename ends with '.Z' or '.gz', it is automatically
compressed or uncompressed using 'compress' or 'gzip'.
The filename may contain the abbreviations '~' or '~user'
to refer to home directories.
Low-level access methods:
readLine()
returns the next non-blank line. The return
value is a tuple whose first element contains the line type
identifier. For lines of type 'ATOM' and 'HETATM', the
remaining fields are encoded as numbers and strings; the position
is returned as an object of class Vector. For other lines, the
second element of the tuple contains the part of the line after
the type field as a single string.
writeComment(text)
writes the supplied text into one or several comment lines.
Each line of the text is prefixed with 'REMARK' and written
to the file.
nextResidue(name)
must be called in between writeAtom calls to indicate the
beginning of a new residue. The argument is the residue
identifier.
writeAtom(name, position)
writes a HETATM line for an atom with the given name and
position. The position is given as an instance of class Vector.
High-level access methods:
readConfiguration()
reads all ATOM and HETATM lines, ignoring anything else.
The result is a list of Residue objects (see below).
readSequence()
returns a list of residue identifiers.
writeConfiguration(conf)
writes the supplied configuration (a list of Residue objects)
to the file. This method can be called several times with
parts of a configuration; however, each residue must be
written in a single call.
General methods:
close()
*must* be called after writing to a file, should be called after
reading to avoid memory leaks.
Class PDBConfiguration:
-----------------------
Creation:
PDBConfiguration(filename)
to generate a configuration by reading a file (see PDBFile).
PDBConfiguration(residue_list)
to generate a configuration from a given list of Residue
objects (see below).
PDBConfiguration()
to generate an empty configuration.
Access (assuming c is an instance of PDBConfiguration):
len(c)
returns the number of Residues in the configuration
c[i]
returns residue number i.
c.writeToFile(filename)
writes the configuration to a file.
c.addResidue(residue)
adds a residue to the configuration.
Class Residue:
--------------
Creation:
Residue(name, atoms)
to generate a residue with identifier 'name' and atoms
from the list 'atoms', whose elements must be instances
of class Atom (see below).
Residue(name)
to generate an empty residue with a given identifier.
Access (assuming r is an instance of Residue):
len(r)
returns the number of atoms in the residue
r[i]
returns atom number i.
r.addAtom(atom)
adds an atom to the residue.
Class Atom:
-----------
Creation:
Atom(name, position)
to generate an atom with identifier 'name' at a given position.
The position must be an instance of class Vector.
Modification methods:
moveBy(distance)
adds the given distance (instance of class Vector) to the
position.
moveTo(position)
changes the position.
Example:
--------
conf = PDBConfiguration('example.pdb')
print conf
for residue in conf:
for atom in residue:
print atom
new_conf = PDBConfiguration(conf)
new_conf.writeToFile('test.pdb')
"""
from TextFile import TextFile
from Vector import Vector
import string
class Residue:
def __init__(self, name, atoms = None):
self.name = name
if atoms:
self.atoms = atoms
else:
self.atoms = []
def __len__(self):
return len(self.atoms)
def __getitem__(self, item):
return self.atoms[item]
def __str__(self):
s = 'Residue ' + self.name + ':\n'
for atom in self.atoms:
s = s + ' ' + `atom` + '\n'
return s
__repr__ = __str__
def addAtom(self, atom):
self.atoms.append(atom)
class Atom:
def __init__(self, name, position):
self.name = name
self.position = position
def __str__(self):
return 'Atom ' + self.name + ' at ' + str(self.position)
__repr__ = __str__
def moveBy(self, distance):
self.position = self.position + distance
def moveTo(self, position):
self.position = position
class PDBFile:
def __init__(self, filename, mode = 'r'):
self.file = TextFile(filename, mode)
self.atom_num = 0
self.abs_res_num = 0
self.output = string.lower(mode[0]) == 'w'
def readLine(self):
while 1:
line = self.file.readline()
if not line: return ('END','')
if line[-1] == '\n': line = line[:-1]
line = string.strip(line)
if line: break
type, line = (string.strip(line[:6]), line[6:])
if type == 'ATOM' or type == 'HETATM':
return (type, string.atoi(line[:5]), string.strip(line[6:10]),
string.strip(line[11:15]), string.atoi(line[16:20]),
Vector(string.atof(line[24:32]), string.atof(line[32:40]),
string.atof(line[40:48])),
string.atof(line[48:54]), string.atof(line[54:60]))
else:
return (type, line)
def readConfiguration(self):
resnum = -1
conf = []
while 1:
line = self.readLine()
if line[0] == 'END': break
if line[0] == 'ATOM' or line[0] == 'HETATM':
if line[4] != resnum:
resnum = line[4]
residue = Residue(line[3])
conf.append(residue)
residue.addAtom(Atom(line[2], line[5]))
return conf
def readSequence(self):
resnum = -1
sequence = []
while 1:
line = self.readLine()
if line[0] == 'END': break
if line[0] == 'ATOM':
if line[4] > resnum:
resnum = line[4]
sequence.append(line[3])
return sequence
def writeComment(self, text):
while text:
eol = string.find(text,'\n')
if eol == -1:
eol = len(text)
self.file.write('REMARK %s \n' % text[:eol])
text = text[eol+1:]
def nextResidue(self, resid):
self.resid = resid
self.abs_res_num = self.abs_res_num + 1
def writeAtom(self, name, position):
self.atom_num = self.atom_num + 1
self.file.write('HETATM%5d ' % (self.atom_num))
if name[0:2] not in self.two_letter_elements:
name = ' ' + name
self.file.write(string.ljust(name,4)[0:4] + ' ')
self.file.write(string.ljust(self.resid,3)[0:3] + ' ')
self.file.write(' %4d ' % self.abs_res_num)
self.file.write('%8.3f' % position[0])
self.file.write('%8.3f' % position[1])
self.file.write('%8.3f' % position[2])
self.file.write('%6.2f' % 0.)
self.file.write('%6.2f' % 0.)
self.file.write('\n')
two_letter_elements = ['HE', 'LI', 'BE', 'NE', 'NA', 'MG', 'AL', 'SI',
'CL', 'AR', 'CA', 'SC', 'TI', 'CR', 'MN', 'FE',
'CO', 'NI', 'CU', 'ZN', 'GA', 'GE', 'AS', 'SE',
'BR', 'KR', 'RB', 'SR', 'ZR', 'NB', 'MO', 'TC',
'RU', 'RH', 'PD', 'AG', 'CD', 'IN', 'SN', 'SB',
'TE', 'XE', 'CS', 'BA', 'HF', 'TA', 'RE', 'OS',
'IR', 'PT', 'AU', 'HG', 'TL', 'PB', 'BI', 'PO',
'AT', 'RN', 'FR', 'RA', 'RF', 'HA', 'SG', 'NS',
'HS', 'MT', 'LA', 'CE', 'PR', 'ND', 'PM', 'SM',
'EU', 'GD', 'TB', 'DY', 'HO', 'ER', 'TM', 'YB',
'LU', 'AC', 'TH', 'PA', 'NP', 'PU', 'AM', 'CM',
'BK', 'CF', 'ES', 'FM', 'MD', 'NO', 'LR']
def writeConfiguration(self, data):
for residue in data:
self.nextResidue(residue.name)
for atom in residue.atoms:
self.writeAtom(atom.name, atom.position)
def close(self):
if self.output:
self.file.write('END\n')
self.file.close()
class PDBConfiguration:
def __init__(self, conf = None):
if type(conf) == type(''):
file = PDBFile(conf)
self.residues = file.readConfiguration()
file.close()
elif conf:
self.residues = conf
else:
self.residues = []
def __len__(self):
return len(self.residues)
def __getitem__(self, item):
return self.residues[item]
def __str__(self):
s = ''
for residue in self.residues:
s = s + str(residue)
return s
def writeToFile(self, filename):
file = PDBFile(filename, 'w')
file.writeConfiguration(self.residues)
file.close()
def addResidue(self, residue):
self.residues.append(residue)
|