babel-1.6
|
Makefile,
README.1ST,
addh.c,
addh2.c,
aromatic.c,
assbnd.c,
asstypes.c,
babel.h,
bblmacs.h,
bblmast.h,
bbltyp.h,
block.c,
bndord.c,
bo.c,
buildct.c,
combine.c,
convert.c,
delatms.c,
delh2o.c,
element.lis,
filesrch.c,
fileutil.c,
gastchg.c,
gauss.hdr,
htoend.c,
int2cart.c,
intcart.c,
menus.c,
miniums.c,
molwt.c,
new.lis,
nodummy.c,
orient.c,
precip.c,
printbad.c,
progress.c,
psgvb.hdr,
quanta.lis,
rdalch.c,
rdampout.c,
rdbalst.c,
rdbgf.c,
rdboogie.c,
rdc3d.c,
rdcacao.c,
rdcadpac.c,
rdcharmm.c,
rdcsd.c,
rddock.c,
rddpdb.c,
rdelmnts.c,
rdfdat.c,
rdfeat.c,
rdfract.c,
rdg96.c,
rdgamout.c,
rdgauout.c,
rdgzmat.c,
rdhin.c,
rdinsite.c,
rdint.c,
rdirc.c,
rdisis.c,
rdm3d.c,
rdmacmod.c,
rdmacmol.c,
rdmdl.c,
rdmicro.c,
rdmm2.c,
rdmm2in.c,
rdmm3.c,
rdmolen.c,
rdmopac.c,
rdmopcrt.c,
rdpcmod.c,
rdpdb.c,
rdprep.c,
rdpsgout.c,
rdpsgvin.c,
rdquanta.c,
rdschak.c,
rdshelx.c,
rdsmiles.c,
rdspart.c,
rdspmm.c,
rdspsemi.c,
rdsybmol.c,
rdsybyl.c,
rdtypes.c,
rdunichm.c,
rdwiz.c,
rdxed.c,
rdxyz.c,
renum.c,
report.c,
rings.c,
ringutil.c,
sets.c,
smilesto.c,
spline.c,
strngutl.c,
tokenst.c,
tosmiles.c,
tree.c,
typbybo.c,
types.lis,
umslist.c,
utils.c,
vectors.c,
wralch.c,
wrbalst.c,
wrbgf.c,
wrbmin.c,
wrbox.c,
wrc3d.c,
wrcacao.c,
wrcache.c,
wrcacint.c,
wrchdrw.c,
wrcontmp.c,
wrcsr.c,
wrcssr.c,
wrdock.c,
wrdpdb.c,
wrfeat.c,
wrfh.c,
wrg96.c,
wrgamess.c,
wrgau.c,
wrgaucrt.c,
wrhin.c,
wricon.c,
wrint.c,
wrisis.c,
wrm3d.c,
wrmaccs.c,
wrmacmod.c,
wrmcmol.c,
wrmdl.c,
wrmicro.c,
wrmimic.c,
wrmiv.c,
wrmm2.c,
wrmm3.c,
wrmopac.c,
wrpcmod.c,
wrpdb.c,
wrpsgv.c,
wrpsgvz.c,
wrsmiles.c,
wrspart.c,
wrsybmol.c,
wrsybyl.c,
wrtinker.c,
wrtorlst.c,
wrunichm.c,
wrwiz.c,
wrxed.c,
wrxyz.c
|
|
|
/*
tosmiles.c
converts connection tables to unique SMILES strings
Simon Kilvington, University of Southampton, 1995
*/
#include "bbltyp.h"
/*
contabtosmiles
converts the given fragment into a unique SMILES string
uses the algorithm described by the Weiningers in J.Chem.Inf.Comput.Sci. 29 (1989), 97-101
returns a ptr to the string which needs to be free'd when you've finished with it
returns NULL if any problems
it will explicitly include any H atoms in frag in the resulting SMILES, so it's best to remove all H atoms from frag before calling this
*/
char *
contabtosmiles(smilescontab_t *frag)
{
char *smiles;
int *rank, *primesum, *tmprank, *primelist;
int a, b, c, n, bto, nranks, lowest;
int nbonds, nprimes;
int prime, same;
smilesatom_t *aptr;
block_ptr blk;
int i;
if(!block_alloc(&blk, "iiii", &rank, frag->natoms, &primesum, frag->natoms, &tmprank, frag->natoms, &primelist, frag->natoms))
return NULL;
/* generate an array of the first "frag->natoms" prime numbers */
nprimes = 0;
for(n=2; nprimesnatoms; n++)
{
prime = TRUE;
for(c=2; c<=(n/2) && prime; c++)
prime = ((n % c) != 0);
if(prime)
primelist[nprimes++] = n;
}
/* determine the initial graph invariants for each atom, store them in primesum */
for(a=0; anatoms; a++)
{
aptr = &frag->atom[a];
/* no of connections */
nbonds = nbondsto(aptr);
primesum[a] = nbonds * 100000;
/* no of non-H bonds (frag should contain no H atoms) */
primesum[a] += nbonds * 10000;
#if 0 /*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
/* atomic no */
primesum[a] += md_atomicnumber(aptr->symbol) * 100;
/* sign and absolute value of charge */
primesum[a] += md_chargeonatom(aptr) * 10;
/* no of attached H's */
primesum[a] += (md_maxvalency(aptr->symbol) - nbonds);
#endif /*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
}
do
{
/* rank each atom based on primesum values (initially these are the invariants, on subsequent loops they are the tie breaker ranks) */
sortranks(frag->natoms, primesum, rank);
do
{
/* generate the product of the "corresponding primes" of each atoms neighbours */
for(a=0; anatoms; a++)
{
aptr = &frag->atom[a];
primesum[a] = 1;
for(b=0; bbondedto[b];
primesum[a] *= (bto != -1) ? primelist[rank[bto]-1] : 1; /* -1 as array indices start at 0 */
}
}
/* resolve ties in "rank" list using the "primesum" values, put the new ranks in "tmprank" */
nranks = resolveties(frag->natoms, rank, primesum, tmprank);
/* see if the ranks are the same as last time */
same = TRUE;
for(a=0; anatoms && same; a++)
same = (rank[a] == tmprank[a]);
/* move the sorted data back into rank array */
memcpy(rank, tmprank, frag->natoms * sizeof(int));
}
while(!same);
/* see if we have any tied atoms */
if(nranks != frag->natoms) /* break ties */
{
/* find lowest tied rank */
lowest = 0;
do
{
lowest++;
c = 0;
for(a=0; anatoms; a++)
c += (rank[a] == lowest);
}
while(c == 1);
/* double all the ranks, store them in primesum array */
for(a=0; anatoms; a++)
primesum[a] = rank[a] * 2;
/* reduce the first lowest tied one by 1 */
for(a=0; primesum[a] != (lowest * 2); a++)
;
primesum[a]--;
}
}
while(nranks != frag->natoms);
/* frag is now cannonicalised, lets get on with generating the smiles string */
smiles = generatesmilesstring(frag, rank);
/* clean up */
block_free(&blk);
return smiles;
}
/*
sortranks
generates a list of ranks in "rank" based on the numbers in "value" (ie lowest value is ranked 1 etc)
returns number of different ranks
the value array will be corrupted, it'll end up as all -1's
*/
int
sortranks(int natoms, int *value, int *rank)
{
int a, sort, lowest;
sort = 1;
do
{
/* find the lowest value that isnt -1, our tag for values that have already been sorted out (ho ho!) */
lowest = -1; /* not found yet */
for(a=0; a 0)
{
for(a=0; a 0);
}
return nranks-1;
}
/*
generatesmilesstring
generates a smiles string for the fragment based on the given atomic ranks
*/
typedef struct /* an array of these holds the data about each atom generated by the first pass thru the fragment */
{ /* the second pass uses these to write out the smiles string */
char symbol[2]; /* atom symbol */
int aromatic; /* TRUE if ANY of the bonds to it are aromatic */
int ring[md_MAXBONDS]; /* ring closure numbers associated with this atom */
int rbracket; /* does this atom end a branch */
int lbracket; /* does the NEXT atom start a branch */
int endfrag; /* do we need a "." after this atom to separate two structures */
smilesbond_t bond; /* order of bond to next atom */
} _smileyatomdata;
char *
generatesmilesstring(smilescontab_t *frag, int *rank)
{
char *smiles;
char tmpstr[8];
int *bracketstack, *ringstack, *neworder; /* neworder[i] is the the index of frag->atom[i] in the atomdata array */
int bsptr, rsptr;
int a, b, bto, f, lowest, tmp, smileslen;
int smipos, atom, nextatom, ringatom, newring, closeatom, maxbonds, natoms, nrings;
smilesbond_t order;
int *visited;
block_ptr blk;
_smileyatomdata *atomdata, initatomdata;
static const char bondchr[] = "-=#:";
natoms = frag->natoms;
if(!(atomdata = malloc(natoms * sizeof(_smileyatomdata))))
return NULL;
if(!block_alloc(&blk, "iiiB", &bracketstack, natoms, &ringstack, natoms*md_MAXBONDS, &neworder, natoms, &visited, natoms))
{
free(atomdata);
return NULL;
}
/* set up the initial (blank) atomdata entry */
initatomdata.symbol[0] = '\0';
initatomdata.symbol[1] = '\0';
initatomdata.aromatic = FALSE;
for(b=0; bnatoms; a++) /* haven't visited any atoms yet */
visited[a] = FALSE;
natoms = 0; /* number of atoms we have in our smiley atom data array so far */
/* find the atom with rank 1 */
atom = lowestunvisitedatom(frag->natoms, rank, visited);
/* first pass, determine which order the atoms should appear in, and store some attributes of each one - start with the one we have just found */
while(natoms < frag->natoms && atom != -1)
{
/* set up the atomdata entry for atom number "atom" */
atomdata[natoms] = initatomdata;
strncpy(atomdata[natoms].symbol, frag->atom[atom].symbol, 2);
atomdata[natoms].aromatic = hasaromaticbonds(&frag->atom[atom]);
/* remember we have visited it, and store its index into the atomdata array */
visited[atom] = TRUE;
neworder[atom] = natoms;
/* if there are no unvisited atoms attached to this atom */
if(nunvisitedbondsto(&frag->atom[atom], visited) == 0)
{
/* jump back to the atom on top of the branch stack if there is one */
if(bsptr > 0)
{
/* add an end of branch bracket */
atomdata[natoms].rbracket = TRUE;
/* go back to the start of the branch */
bsptr --;
atom = bracketstack[bsptr];
}
else
{
/* is it time to start a new structure, or have we reached the end of the molecule */
if(natoms != frag->natoms-1)
{
atomdata[natoms].endfrag = TRUE;
atom = lowestunvisitedatom(frag->natoms, rank, visited);
}
/* increase the number of atoms we have seen, ready for next time */
natoms ++;
continue;
}
}
/* there are unvisited atoms attached to this one */
/*!!!!!!!! what if the atom we pulled off the branch stack has no unvisited atoms attached to it? !!!!!!!!!!*/
/*!!!!!!!! this can only happen if it's the last atom, or the end of a substructure !!!!!!!!!!*/
nextatom=-1;
/* find the lowest ranked, unvisited, atom attached to "atom", this is the next one to visit */
lowest = frag->natoms+1;
for(b=0; batom[atom].bondedto[b];
if(bto != md_NOBOND && !visited[bto] && rank[bto] < lowest)
{
lowest = rank[bto];
nextatom = bto;
}
}
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
if(nextatom==-1)
{
printf("natoms=%d\tatom=%d\tnext=-1\n", natoms, atom+1);
atom=-1;
continue;
}
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
/* count how many bonds from this atom form rings */
nrings = 0;
/* see if "nextatom" forms a ring, ie can we follow "nextatom" and get back to "atom" again */
ringatom = findunvisitedring(frag, atom, nextatom, visited);
/* did we find a ring */
if(ringatom != -1)
{
/* is the bond order between "atom" and "ringatom" double or triple */
order = getbondorder(&frag->atom[atom], ringatom);
if(order == SMILESBOND_Double || order == SMILESBOND_Triple)
{
/* follow "ringatom" instead of "nextatom" so we dont have to close the ring with a multiple bond */
tmp = nextatom;
nextatom = ringatom;
ringatom = tmp;
}
/* store the two atoms on the ring closure stack */
ringstack[rsptr++] = atom;
ringstack[rsptr++] = ringatom;
/* increase our ring count */
nrings ++;
/* see if any of the other unvisited atoms connected to this one also form a ring */
for(b=0; batom[atom].bondedto[b];
if(bto == md_NOBOND || bto == nextatom || bto == ringatom || visited[bto])
continue;
/* does atom "bto" form a ring (ie can we trace a path from it back to "atom") */
newring = findunvisitedring(frag, atom, bto, visited);
if(newring != -1)
{
/* add the two atoms to the ring stack */
ringstack[rsptr++] = atom;
ringstack[rsptr++] = bto;
/* count how many bonds form rings */
nrings ++;
}
}
}
/* see if this is a branch point, ie more than 1 unvisited bonds to it that don't form rings */
maxbonds = 1 + nrings;
if(nunvisitedbondsto(&frag->atom[atom], visited) > maxbonds)
{
atomdata[natoms].lbracket = TRUE;
/* store this atom on the branch stack */
bracketstack[bsptr] = atom;
bsptr ++;
}
/* store the bond order between this atom and the next */
atomdata[natoms].bond = getbondorder(&frag->atom[atom], nextatom);
/* move onto the next atom */
atom = nextatom;
/* increase the number of atoms we have seen, ready for next time */
natoms ++;
}
/* did anything go wrong */
if(natoms != frag->natoms)
{
free(atomdata);
block_free(&blk);
return NULL;
}
/* pull the ring closure atoms off the ringstack and convert them into ring closure numbers */
nrings = rsptr / 2;
while(nrings > 0)
{
ringatom = neworder[ringstack[--rsptr]]; /* convert the frag->atom index into an atomdata index */
closeatom = neworder[ringstack[--rsptr]];
/* mark the ring closure in atomdata[] */
for(f=0; atomdata[ringatom].ring[f] != -1; f++) /* find next free entry */
;
atomdata[ringatom].ring[f] = nrings;
for(f=0; atomdata[closeatom].ring[f] != -1; f++)
;
atomdata[closeatom].ring[f] = nrings;
nrings--;
};
/* work out the max length "smiles" needs to be */
smileslen = 1; /* don't forget the terminator */
for(a=0; anatoms; a++)
{
/* the atom name */
smileslen += addsmilesatom(tmpstr, atomdata[a].symbol, atomdata[a].aromatic);
/* ring closure numbers */
for(b=md_MAXBONDS-1; b>=0; b--)
{
nrings = atomdata[a].ring[b];
if(nrings != -1)
smileslen += (nrings > 9) ? 3 : 1; /* do we need a "%xx" or just a "x" */
}
/* right brackets */
if(atomdata[a].rbracket)
smileslen ++;
/* left brackets */
if(atomdata[a].lbracket)
smileslen ++;
/* bond symbols - we never need to write aromatic bond symbols because the bound atoms name's will be in lower case */
order = atomdata[a].bond;
if(order == SMILESBOND_Double || order == SMILESBOND_Triple)
smileslen ++;
/* substructure separators */
if(atomdata[a].endfrag)
smileslen ++;
}
/* alloc some space for "smiles" */
if(!(smiles = malloc(smileslen)))
{
free(atomdata);
block_free(&blk);
return NULL;
}
/* second pass - generate the smiles string using the data we have just set up */
smipos = 0; /* smipos is index of terminator in smiles string */
for(a=0; anatoms; a++)
{
/* print the atom name */
smipos += addsmilesatom(&smiles[smipos], atomdata[a].symbol, atomdata[a].aromatic);
/* ring numbers are stored in reverse numerical order, so print them out backwards */
for(b=md_MAXBONDS-1; b>=0; b--)
{
nrings = atomdata[a].ring[b];
if(nrings != -1)
smipos += addsmilesring(&smiles[smipos], nrings);
}
/* does this atom end a branch */
if(atomdata[a].rbracket)
smiles[smipos++] = ')';
/* does the atom end a substructure */
if(atomdata[a].endfrag)
smiles[smipos++] = '.';
/* does the next atom start a branch */
if(atomdata[a].lbracket)
smiles[smipos++] = '(';
/* we never need to write aromatic bond symbols because the bound atoms name's will be in lower case */
order = atomdata[a].bond;
if(order == SMILESBOND_Double || order == SMILESBOND_Triple)
smiles[smipos++] = bondchr[order - SMILESBOND_Single];
}
smiles[smipos] = '\0';
free(atomdata);
block_free(&blk);
return smiles;
}
/*
addsmilesatom
adds the smiles symbol(s) (in lower case if aromatic is TRUE) to the string
returns the number of characters added
*/
#define NINSUBSET 10
int
addsmilesatom(char *string, char *symbol, int aromatic)
{
int n, s;
static const char *organicsubset[NINSUBSET] = { "B", "C", "N", "O", "P", "S", "F", "Cl", "Br", "I" };
/* if the symbol is not in the "organic subset" we need to enclose it in square brackets */
for(s=0; s < NINSUBSET && strncmp(symbol, organicsubset[s], 2) != 0; s++)
;
/* count how many chars we add to the string */
n = 0;
if(s == NINSUBSET)
string[n++] = '[';
string[n++] = (aromatic) ? tolower(symbol[0]) : symbol[0];
/* is it a one letter symbol */
if(symbol[1] != '\0')
string[n++] = (aromatic) ? tolower(symbol[1]) : symbol[1];
if(s == NINSUBSET)
string[n++] = ']';
return n;
}
#undef NINSUBSET
/*
addsmilesring
adds a ring closure digit (or %xx if ringnum > 9) to the given string
returns the number of characters it added
*/
int
addsmilesring(char *string, int ringnum)
{
int n;
if(ringnum < 10)
{
*string = '0' + ringnum;
n = 1;
}
else
{
n = (int)sprintf(string, "%%%d", ringnum);
}
return n;
}
/*
lowestunvisitedatom
returns the index of the atom which has "visited[]" set to FALSE and the lowest value in "rank[]"
*/
int
lowestunvisitedatom(int natoms, int *rank, int *visited)
{
int a, lowestatom, lowestrank;
lowestatom = -1;
for(a=0; anatoms * sizeof(int))))
return -1;
ring = FALSE;
for(b=0; batom[start].bondedto[b];
if(close != md_NOBOND && close != branch && !visited[close])
{
/* take a copy of the visited array as we will need to change it */
memcpy(done, visited, frag->natoms * sizeof(int));
/* can we trace a path from "start", along the bond to "close", and end up at "branch" ? */
ring = dotheymeet(frag, start, close, branch, done);
}
}
free(done);
return ring ? close : -1;
}
/*
dotheymeet
recursively searches from "next" (but not along the bond to "start") for "target"
only visits an atom i if visited[i]==FALSE
all atoms that are visited will have visited[i] set to TRUE
returns TRUE if it was found
*/
int
dotheymeet(smilescontab_t *frag, int start, int next, int target, int *visited)
{
int b, bto;
int meet;
/*!!!!! could we store "visited" for each atom connected to "next" here, then restore it before we return? !!!!!*/
meet = FALSE;
for(b=0; batom[next].bondedto[b];
if(bto != md_NOBOND && bto != start && !visited[bto])
{
visited[bto] = TRUE;
meet = (bto == target) ? TRUE : dotheymeet(frag, next, bto, target, visited);
}
}
return meet;
}
/*
nunvisitedbondsto
returns the number of bonds to the given atom
bonds are only counted if visited[i] == FALSE where i is the index of the atom bound to this one
*/
int
nunvisitedbondsto(smilesatom_t *aptr, int *visited)
{
int b, n, bto;
n=0;
for(b=0; bbondedto[b];
n += ((bto != md_NOBOND) && !visited[bto]);
}
return n;
}
/*
nbondsto
returns the total number of atoms bonded to this one
*/
int
nbondsto(smilesatom_t *aptr)
{
int b, nbonds;
nbonds = 0;
for(b=0; bbondedto[b] != md_NOBOND);
return nbonds;
}
/*
getbondorder
returns the bond order of the bond from the atom pointed to by "aptr" to atom number "atom"
if they are not bonded it returns SMILESBOND_NoBond
*/
smilesbond_t
getbondorder(smilesatom_t *aptr, int atom)
{
int b;
smilesbond_t order;
order = SMILESBOND_NoBond;
for(b=0; bbondedto[b] == atom)
order = aptr->bondtype[b];
}
return order;
}
/*
hasaromaticbonds
returns TRUE if there are one or more aromatic bonds to the given atom
*/
int
hasaromaticbonds(smilesatom_t *aptr)
{
int b;
int has = FALSE;
for(b=0; bbondedto[b] != md_NOBOND && aptr->bondtype[b] == SMILESBOND_Aromatic);
return has;
}
|