CCL Home Page
Up Directory CCL car2pdbatom.awk
#!/usr/local/bin/nawk -f
# generate pdb ATOM records from car file

# rename atom names if necessary
# known chemical species: H N C O S L (lone pair)

# test for iupac conformant atom names
function iupac(atomname)
{
	if ( atomname ~ /^[HCNOSL]([ABGDEZH][12]?)?$/ || atomname == "OXT" )
		return(1)
	else return(0)
}

# test if first two characters indicate atomic species
function check(atomname)
{
	if ( length(atomname) == 3 && match(atomname,"[HCNOSL]") == 1 )
		return(1)
	else return(0)
}

# get atomic species from atom name
function species(atomname,   i)
{
	i=match(atomname,"[HCNOSL]")
	if (i == 0) {
		printf("car2pdbatom.awk: %s is not of species H C N O S\n",atomname)
		exit(1)
	}
	return(substr(atomname,i,1))
}

BEGIN{
	if ( FILENAME == "-" ) {
		print "usage:car2pdbatom.awk "
		exit
	}
	output_format="ATOM  %5i %4s %3s  %4s    %8.3f%8.3f%8.3f%6.2f%6.2f    \n"
	#                    ^ atom number
	#                        ^ atom name
	#                            ^ residue name
	#                                 ^ residue number
	#                                        ^ xyz coordinates
	#                                                       ^ occupancy
	#                                                            ^ temperature factor
	# first atom number minus one
	atomnumber=0
}

# each line consisting of 9 fields, field 2,3 and 4 being numbers, is an atom record
(NF == 9) && 
($2 ~ /^-?[0-9][0-9]*\.[0-9][0-9]*(E-?[0-9][0-9]*)?$/) && 
($3 ~ /^-?[0-9][0-9]*\.[0-9][0-9]*(E-?[0-9][0-9]*)?$/) && 
($4 ~ /^-?[0-9][0-9]*\.[0-9][0-9]*(E-?[0-9][0-9]*)?$/)\
{
	atomnumber++
	atomname=$1
	residuename=$5
	residuenumber=$6
	x=$2 + 0.0
	y=$3 + 0.0
	z=$4 + 0.0
	occupancy=1.00
	temperature=0.00
	# truncate residuename to 3 characters
	residuename = substr(residuename,1,3)
	# atom count within residue
	if ( count[residuenumber] == "" )
		count[residuenumber] = 1
	else count[residuenumber]++
	# reformat atom name if necessary
	# reformat not necessary if name conforms to IUPAC
	# or if first 2 columns of 4 character name indicate atomic species
	if ( ! iupac(atomname) && ! check(atomname) )
		atomname = species(atomname) count[residuenumber]
	pad = 3-length(atomname)
	if (pad == 1) atomname = atomname " "
	else if (pad == 2) atomname = atomname "  "
	# output pdb atom records
	printf(output_format,atomnumber,atomname,residuename,residuenumber,x,y,z,occupancy,temperature)
}
END{
	print "TER"
}

Modified: Wed Nov 13 17:00:00 1996 GMT
Page accessed 378 times since Fri Apr 12 01:14:40 2002 GMT