#!/usr/local/bin/nawk -f
# generate pdb ATOM records from car file
# rename atom names if necessary
# known chemical species: H N C O S L (lone pair)
# test for iupac conformant atom names
function iupac(atomname)
{
if ( atomname ~ /^[HCNOSL]([ABGDEZH][12]?)?$/ || atomname == "OXT" )
return(1)
else return(0)
}
# test if first two characters indicate atomic species
function check(atomname)
{
if ( length(atomname) == 3 && match(atomname,"[HCNOSL]") == 1 )
return(1)
else return(0)
}
# get atomic species from atom name
function species(atomname, i)
{
i=match(atomname,"[HCNOSL]")
if (i == 0) {
printf("car2pdbatom.awk: %s is not of species H C N O S\n",atomname)
exit(1)
}
return(substr(atomname,i,1))
}
BEGIN{
if ( FILENAME == "-" ) {
print "usage:car2pdbatom.awk "
exit
}
output_format="ATOM %5i %4s %3s %4s %8.3f%8.3f%8.3f%6.2f%6.2f \n"
# ^ atom number
# ^ atom name
# ^ residue name
# ^ residue number
# ^ xyz coordinates
# ^ occupancy
# ^ temperature factor
# first atom number minus one
atomnumber=0
}
# each line consisting of 9 fields, field 2,3 and 4 being numbers, is an atom record
(NF == 9) &&
($2 ~ /^-?[0-9][0-9]*\.[0-9][0-9]*(E-?[0-9][0-9]*)?$/) &&
($3 ~ /^-?[0-9][0-9]*\.[0-9][0-9]*(E-?[0-9][0-9]*)?$/) &&
($4 ~ /^-?[0-9][0-9]*\.[0-9][0-9]*(E-?[0-9][0-9]*)?$/)\
{
atomnumber++
atomname=$1
residuename=$5
residuenumber=$6
x=$2 + 0.0
y=$3 + 0.0
z=$4 + 0.0
occupancy=1.00
temperature=0.00
# truncate residuename to 3 characters
residuename = substr(residuename,1,3)
# atom count within residue
if ( count[residuenumber] == "" )
count[residuenumber] = 1
else count[residuenumber]++
# reformat atom name if necessary
# reformat not necessary if name conforms to IUPAC
# or if first 2 columns of 4 character name indicate atomic species
if ( ! iupac(atomname) && ! check(atomname) )
atomname = species(atomname) count[residuenumber]
pad = 3-length(atomname)
if (pad == 1) atomname = atomname " "
else if (pad == 2) atomname = atomname " "
# output pdb atom records
printf(output_format,atomnumber,atomname,residuename,residuenumber,x,y,z,occupancy,temperature)
}
END{
print "TER"
}
|