|
#!/usr/local/bin/perl -w
# this takes Gaussian input file with geometry given as
# Zmat, and converts it to the XYZ format (cartesians)
# This is an educational script, hence many comments.
# The script is a filter, i.e., should be used as
# cat name.inp | g9xinp2xyz.pl > name.xyz
# where name.inp is the Gaussian input file and
# name.xyz is a file with cartesian coordinates a la XMol
# It is assumed that G9x input file has a format:
# % part (e.g., checkpoint specs)
# # keywords/links part
# empty line
# title lines
# empty line
# charge multiplicity line
# zmatrix lines
# empty line or "Variables"
# symbolic variables for Z-matrix (if ZMAT has symbolic parameters)
# empty line or "Constants"
# symbolic constants for Z-matriz (if all references were not resolved yet)
# empty line or line starting with "-"
#
# Written by Jan Labanowski in July 1997
#
$keywords = "";
while($line = ) { # read next lines of keywords (route section)
chop($line);
if($line =~ /^\s*\%/) {
next;
}
elsif($line =~ /\S/) {
$keywords .= ' ' . $line;
}
elsif(($line !~ /\S/) && ($keywords eq "")) { # if empty line, but no # yet
next;
}
else {
last;
}
}
$title = "";
while($line = ) { # read title line(s)
chop($line);
if($line =~ /\S/) {
$title .= $line . ' ';
}
else {
last;
}
}
$line = ; # this should be charge multiplicity line
chop($line);
$natoms = 0; #number of atoms found initialized
while ($line = ) { # read molecule description lines
chop($line);
$line =~ tr/a-z/A-Z/;
$line =~ s/^\s+//; # get rid of front and back spaces
$line =~ s/$\s+//;
if(($line !~ /\S/) ||
($line =~ /variab/i) ||
($line =~ /consta/i)) { # if blank line or end of Zmat
last; # exit the loop
}
if($natoms == 0) { # first atom is only an atomic symbol
$symbol[$natoms] = $line;
# no parameters necessary for atom 1
($bf[$natoms], $af[$natoms], $tf[$natoms]) = (1, 1, 1);
}
else {
# split the line into individual fields:
(@fields) = split(/\s+/, $line); # split line into fields
# assumed most standard z-matrix form, so check number of entries
if((($#fields != 2) && ($natoms == 1)) &&
(($#fields != 4) && ($natoms == 2)) &&
(($#fields != 6) && ($natoms >= 3))) {
printf STDERR "Error in Z-Matrix for atom %d\n", $natoms+1;
exit(2); # abort
}
for($i = $#fields+1; $i <8; $i++) { # fill up with empty strings
$fields[$i] = "";
}
# parse fields symbol_i j d_ij k theta_ijk l phi_ijkl
($symbol[$natoms], # atom symbol
$bondat[$natoms], # bonded atom
$blen[$natoms], # bond length
$angat[$natoms], # atom in an angle
$angle[$natoms], # angle
$torat[$natoms], # last atom in torsion angle
$tors[$natoms]) # torsion angle
= (@fields);
# initialize flags for required parameters to "NotFound" status (i.e., 0)
if($natoms == 1) {
($bf[$natoms], $af[$natoms], $tf[$natoms]) = (0, 1, 1);
}
elsif($natoms == 2) {
($bf[$natoms], $af[$natoms], $tf[$natoms]) = (0, 0, 1);
}
else {
($bf[$natoms], $af[$natoms], $tf[$natoms]) = (0, 0, 0);
}
# check if parameters are given as numbers (i.e., sign, possibly followed
# by digits, possibly followed by period, possibly followed by digits
# and containing at least one digit total. Set flag if number.
if(($blen[$natoms] =~ /^[-+]?[0-9]*\.?[0-9]*$/) &&
($blen[$natoms] =~ /[0-9]+/)) {
$bf[$natoms] = 1;
}
if(($angle[$natoms] =~ /^[-+]?[0-9]*\.?[0-9]*$/) &&
($angle[$natoms] =~ /[0-9]+/)) {
$af[$natoms] = 1;
}
if(($tors[$natoms] =~ /^[-+]?[0-9]*\.?[0-9]*$/) &&
($tors[$natoms] =~ /[0-9]+/)) {
$tf[$natoms] = 1;
}
# Atom numbers must be all digits
if(($bondat[$natoms] !~ /^[0-9]+$/) ||
(($angat[$natoms] !~ /^[0-9]+$/) && ($natoms > 1)) ||
(($torat[$natoms] !~ /^[0-9]+$/) && ($natoms > 2))) {
printf STDERR "Error in atom definition for %d\n", $i+1;
exit(2);
}
}
$natoms++;
}
# check any symbolic parameters are given in the Z-mat
$parms_given = 0;
for($i = 0; $i < $natoms; $i++) {
if(($bf[$i] == 0) ||
($af[$i] == 0) ||
($tf[$i] == 0)) {
$parms_given = 1; # symbolic parameters present
last;
}
}
if($parms_given == 1) { # read in symbolic parameters (variables) when present
while($line = ) {
$line =~ s/^\s//; # trailing blanks out!
$line =~ s/\s$//;
$line =~ tr/a-z/A-Z/; # convert to uppercase
if(($line !~ /\S/) ||
($line =~ /const/i)) { # if blank line or end of Variables
last; # exit the loop
}
($name, $value) = @fields = split(/[ =,]+/, $line); # retrieve fields
if($#fields != 1) {
die "Wrong format on Variables assignment line\n";
}
if(($value !~ /^[-+]?[0-9]*\.?[0-9]*$/) ||
($value !~ /[0-9]+/)) {
die "Wrong number format of Variables assignment line\n";
}
# substitute parameter in Z-Matrix. The symbolic parameter name is
# allowed to be preceded with a sign, so take this into account
$parameter_found = 0;
for($i = 0; $i < $natoms; $i++) {
if($bf[$i] == 0) {
if($blen[$i] =~ /^([-+]?)$name$/) {
$sign = $1 . "1.0";
$blen[$i] = $sign * $value;
$parameter_found = 1;
$bf[$i] = 1;
}
}
if($af[$i] == 0) {
if($angle[$i] =~ /^([-+]?)$name$/) {
$sign = $1 . "1.0";
$angle[$i] = $sign * $value;
$parameter_found = 1;
$af[$i] = 1;
}
}
if($tf[$i] == 0) {
if($tors[$i] =~ /^([-+]?)$name$/) {
$sign = $1 . "1.0";
$tors[$i] = $sign * $value;
$parameter_found = 1;
$tf[$i] = 1;
}
}
}
# parameter, if given, must appear at least once in Z-Matrix
if($parameter_found == 0) {
die "Parameter $name = $value does not appear in Z-Matrix\n";
}
}
}
# check if all symbolic paramemters were reslolved
$parms_given = 0;
for($i = 0; $i < $natoms; $i++) {
if(($bf[$i] == 0) ||
($af[$i] == 0) ||
($tf[$i] == 0)) {
$parms_given = 1; # symbolic parameters present
last;
}
}
# if all parameters are not resloved, read in the Constants section
if($parms_given == 1) { # read in symbolic parameters (variables) when present
while($line = ) {
$line =~ s/^\s//; # trailing blanks out!
$line =~ s/\s$//;
$line =~ tr/a-z/A-Z/; # convert to uppercase
if($line =~ /const/i) {
next;
}
if(($line !~ /\S/) ||
($line =~ /^\s*\-/)) { # if blank line or - at the front
last; # exit the loop
}
($name, $value) = @fields = split(/[ =,]+/, $line); # retrieve fields
if($#fields != 1) {
die "Wrong format of Constants assignment line\n";
}
if(($value !~ /^[-+]?[0-9]*\.?[0-9]*$/) ||
($value !~ /[0-9]+/)) {
die "Wrong number format on Constants assignment line\n";
}
# substitute parameter in Z-Matrix. The symbolic parameter name is
# allowed to be preceded with a sign, so take this into account
$parameter_found = 0;
for($i = 0; $i < $natoms; $i++) {
if($bf[$i] == 0) {
if($blen[$i] =~ /^([-+]?)$name$/) {
$sign = $1 . "1.0";
$blen[$i] = $sign * $value;
$parameter_found = 1;
$bf[$i] = 1;
}
}
if($af[$i] == 0) {
if($angle[$i] =~ /^([-+]?)$name$/) {
$sign = $1 . "1.0";
$angle[$i] = $sign * $value;
$parameter_found = 1;
$af[$i] = 1;
}
}
if($tf[$i] == 0) {
if($tors[$i] =~ /^([-+]?)$name$/) {
$sign = $1 . "1.0";
$tors[$i] = $sign * $value;
$parameter_found = 1;
$tf[$i] = 1;
}
}
}
# parameter, if given, must appear at least once in Z-Matrix
if($parameter_found == 0) {
die "Parameter $name = $value does not appear in Z-Matrix\n";
}
}
}
# check if all symbolic paramemters were reslolved
$parms_given = 0;
for($i = 0; $i < $natoms; $i++) {
if(($bf[$i] == 0) ||
($af[$i] == 0) ||
($tf[$i] == 0)) {
$parms_given = 1; # symbolic parameters present
last;
}
}
if($parms_given == 1) {
print STDERR
"The follwing symbolic parameters are not resolved in Z-matrix:\n";
for($i = 0; $i < $natoms; $i++) {
if($bf[$i] == 0) {
printf STDERR " %s for bond length for atom %d\n", $blen[$i], $i;
}
if($af[$i] == 0) {
printf STDERR " %s for angle value for atom %d\n", $angle[$i], $i;
}
if($tf[$i] == 0) {
printf STDERR " %s for torsion for atom %d\n", $tors[$i], $i;
}
}
exit(2);
}
# output cartesian coordinates in XYZ XMol format
print STDOUT "$natoms\n";
print STDOUT "$title\n";
for ($i = 0; $i < $natoms; $i++) {
oneatom($i+1, $bondat[$i], $blen[$i], $angat[$i], $angle[$i],
$torat[$i], $tors[$i]);
# note that XMol chooses quite interesting orientation
# of the molecule
printf STDOUT "%-3s %12.6f %12.6f %12.6f\n",
$symbol[$i], $Zcoor[$i+1], $Xcoor[$i+1], -$Ycoor[$i+1];
}
# -- Takes Z-mat line, i.e.,
# atomnumber, ibond, bondlen, iangle, bondangle, itors, torsangle
# and updates NONLOCAL arrays Xcoor, Ycoor, Zcoor which hold cartesian
# coordinates. It is assumed that atom numbering starts from 1
# (not from 0 as usually in C and perl)
# USES nonlocal array Xcoor, Ycoor, Zcoor where it stores coordinates
# It is assumed that angles are given in DEGREEs and bondlen in the same
# units in which cartesian coordinates will be given.
#
# ibond -- iangle
# / \
# atnum itors
#
# CAUTIONS -- contrary to the UNIX/PERL/C tradition, the
# arrays XYZ start from subscript 1 (not 0), and XYZ[0] are left unused,
# in other words, array element numbers correspond to atom numbers.
sub oneatom {
# variables are declared local, so they do not interfere with
# variables outside this subroutine
local ($atnum,
$ibond, $bondlen,
$iangle, $bondangle,
$itors, $torsangle);
local ($deg2r);
local ($rinv, $aux);
local ($xa, $ya, $za);
local ($xb, $yb, $zb);
local ($xc, $yc, $zc);
local ($xx, $yy, $zz);
$atnum = $_[0];
if($atnum == 2) {
($atnum, $ibond, $bondlen) = @_;
}
elsif ($atnum == 3) {
($atnum, $ibond, $bondlen, $iangle, $bondangle) = @_;
}
elsif ($atnum > 3) {
($atnum, $ibond, $bondlen, $iangle, $bondangle, $itors, $torsangle) = @_;
}
$deg2r = atan2(1.0, 1.0)*4.0/180.0;
# first atom in the origin of coordinate system
if($atnum == 1) {
$Xcoor[1] = 0.0;
$Ycoor[1] = 0.0;
$Zcoor[1] = 0.0;
return;
}
# second atom on Z-axis, bondlen from the first atom
if($atnum == 2) {
$Xcoor[2] = 0.0;
$Ycoor[2] = 0.0;
$Zcoor[2] = $bondlen;
return;
}
# bonded atom coordinates
$x0 = $Xcoor[$ibond];
$y0 = $Ycoor[$ibond];
$z0 = $Zcoor[$ibond];
if($atnum == 3) { # third atom placed on the Oxz plane
$Xcoor[3] = $x0+$bondlen*sin($bondangle*$deg2r);
$Ycoor[3] = 0.0;
$aux = $bondlen*cos($bondangle*$deg2r);
if($ibond == 1) {
$Zcoor[3] = $z0+$aux;
}
elsif ($ibond == 2) {
$Zcoor[3] = $z0-$aux;
}
return;
}
# vector from j-->k
$xx = $Xcoor[$iangle] - $Xcoor[$ibond];
$yy = $Ycoor[$iangle] - $Ycoor[$ibond];
$zz = $Zcoor[$iangle] - $Zcoor[$ibond];
# inverted vector length
$rinv = 1.0/sqrt($xx*$xx + $yy*$yy + $zz*$zz);
# unit vector ibond--->iangle
$xa = $xx*$rinv;
$ya = $yy*$rinv;
$za = $zz*$rinv;
# vector from ibond--->itors
$xb = $Xcoor[$itors] - $Xcoor[$ibond];
$yb = $Ycoor[$itors] - $Ycoor[$ibond];
$zb = $Zcoor[$itors] - $Zcoor[$ibond];
# unit vector ibond--->itors
$xc = $ya*$zb - $za*$yb;
$yc = $za*$xb - $xa*$zb;
$zc = $xa*$yb - $ya*$xb;
$rinv = 1.0/sqrt($xc*$xc + $yc*$yc + $zc*$zc);
$xc = $xc*$rinv;
$yc = $yc*$rinv;
$zc = $zc*$rinv;
$xb = $yc*$za - $zc*$ya;
$yb = $zc*$xa - $xc*$za;
$zb = $xc*$ya - $yc*$xa;
$zz = $bondlen*cos($bondangle*$deg2r);
$xx = $bondlen*sin($bondangle*$deg2r)*cos($torsangle*$deg2r);
$yy = $bondlen*sin($bondangle*$deg2r)*sin($torsangle*$deg2r);
$Xcoor[$atnum] = $x0 + $xa*$zz + $xb*$xx + $xc*$yy;
$Ycoor[$atnum] = $y0 + $ya*$zz + $yb*$xx + $yc*$yy;
$Zcoor[$atnum] = $z0 + $za*$zz + $zb*$xx + $zc*$yy;
return;
}
|