babel-1.6
|
Makefile,
README.1ST,
addh.c,
addh2.c,
aromatic.c,
assbnd.c,
asstypes.c,
babel.h,
bblmacs.h,
bblmast.h,
bbltyp.h,
block.c,
bndord.c,
bo.c,
buildct.c,
combine.c,
convert.c,
delatms.c,
delh2o.c,
element.lis,
filesrch.c,
fileutil.c,
gastchg.c,
gauss.hdr,
htoend.c,
int2cart.c,
intcart.c,
menus.c,
miniums.c,
molwt.c,
new.lis,
nodummy.c,
orient.c,
precip.c,
printbad.c,
progress.c,
psgvb.hdr,
quanta.lis,
rdalch.c,
rdampout.c,
rdbalst.c,
rdbgf.c,
rdboogie.c,
rdc3d.c,
rdcacao.c,
rdcadpac.c,
rdcharmm.c,
rdcsd.c,
rddock.c,
rddpdb.c,
rdelmnts.c,
rdfdat.c,
rdfeat.c,
rdfract.c,
rdg96.c,
rdgamout.c,
rdgauout.c,
rdgzmat.c,
rdhin.c,
rdinsite.c,
rdint.c,
rdirc.c,
rdisis.c,
rdm3d.c,
rdmacmod.c,
rdmacmol.c,
rdmdl.c,
rdmicro.c,
rdmm2.c,
rdmm2in.c,
rdmm3.c,
rdmolen.c,
rdmopac.c,
rdmopcrt.c,
rdpcmod.c,
rdpdb.c,
rdprep.c,
rdpsgout.c,
rdpsgvin.c,
rdquanta.c,
rdschak.c,
rdshelx.c,
rdsmiles.c,
rdspart.c,
rdspmm.c,
rdspsemi.c,
rdsybmol.c,
rdsybyl.c,
rdtypes.c,
rdunichm.c,
rdwiz.c,
rdxed.c,
rdxyz.c,
renum.c,
report.c,
rings.c,
ringutil.c,
sets.c,
smilesto.c,
spline.c,
strngutl.c,
tokenst.c,
tosmiles.c,
tree.c,
typbybo.c,
types.lis,
umslist.c,
utils.c,
vectors.c,
wralch.c,
wrbalst.c,
wrbgf.c,
wrbmin.c,
wrbox.c,
wrc3d.c,
wrcacao.c,
wrcache.c,
wrcacint.c,
wrchdrw.c,
wrcontmp.c,
wrcsr.c,
wrcssr.c,
wrdock.c,
wrdpdb.c,
wrfeat.c,
wrfh.c,
wrg96.c,
wrgamess.c,
wrgau.c,
wrgaucrt.c,
wrhin.c,
wricon.c,
wrint.c,
wrisis.c,
wrm3d.c,
wrmaccs.c,
wrmacmod.c,
wrmcmol.c,
wrmdl.c,
wrmicro.c,
wrmimic.c,
wrmiv.c,
wrmm2.c,
wrmm3.c,
wrmopac.c,
wrpcmod.c,
wrpdb.c,
wrpsgv.c,
wrpsgvz.c,
wrsmiles.c,
wrspart.c,
wrsybmol.c,
wrsybyl.c,
wrtinker.c,
wrtorlst.c,
wrunichm.c,
wrwiz.c,
wrxed.c,
wrxyz.c
|
|
|
/*****
This file is part of the Babel Program
Copyright (C) 1992-96 W. Patrick Walters and Matthew T. Stahl
All Rights Reserved
All Rights Reserved
All Rights Reserved
For more information please contact :
babel@mercury.aichem.arizona.edu
--------------------------------------------------------------------------------
FILE : rdpdb.c
AUTHOR(S) : Pat Walters
DATE : 10-92
PURPOSE : Routines to read a Brookhave Protien Data Bank file
******/
#include "bbltyp.h"
#define DELIMS "\t\n "
#define PDBATOM 0
#define PDBHETATM 1
#include
#include
#include
#include
int
read_pdb(FILE *file1, ums_type *mol)
{
char pdb_line[BUFF_SIZE];
int i = 0;
int result;
char temp_char;
long int pos;
int chain_num = 1;
char res_num_str[5];
char the_atm_id[5],the_res_name[5];
char the_serial_num[6];
#ifdef DOTIME
struct timeval s_val;
double startTime, endTime;
#endif
pos = ftell(file1);
#ifdef DOTIME
gettimeofday(&s_val, 0);
startTime = (double) s_val.tv_sec + 0.000001*s_val.tv_usec;
#endif
while ((fgets(pdb_line,sizeof(pdb_line), file1) != NULL) && NOTEQn(pdb_line,"END",3))
{
if (EQn(pdb_line,"ATOM",4) || EQn(pdb_line,"HETATM",6))
{
i++;
}
}
#ifdef DOTIME
gettimeofday(&s_val, 0);
endTime = (double) s_val.tv_sec + 0.000001*s_val.tv_usec;
printf("Getting Atoms: %lf\n", endTime - startTime);
startTime = endTime;
#endif
fseek(file1,pos,0);
Atoms = i;
initialize_ums(&mol);
initialize_residues(&mol);
ShowProgress(Atoms,"Reading Atoms");
i = MIN_ATOM;
while ((fgets(pdb_line,sizeof(pdb_line), file1) != NULL) && NOTEQn(pdb_line,"END",3))
{
if (EQn(pdb_line,"TER",3))
chain_num++;
if (EQn(pdb_line,"ATOM",4) || EQn(pdb_line,"HETATM",6))
{
UpdateProgress();
parse_atom_record(&pdb_line[6], i, mol);
if (pdb_line[0] == 'A')
stringToType(PDBATOM, i, mol);
else
stringToType(PDBHETATM, i, mol);
ChainNum(i) = chain_num;
i ++;
}
}
#ifdef DOTIME
gettimeofday(&s_val, 0);
endTime = (double) s_val.tv_sec + 0.000001*s_val.tv_usec;
printf("Reading Atoms: %lf\n", endTime - startTime);
startTime = endTime;
#endif
/*
* Make sure Valence is zeroed out
*/
Bonds = 0;
for (i = MIN_ATOM; i <= Atoms; i++)
{
Valence(i) = 0;
}
fseek(file1,pos,0);
process_connect_records(file1,mol);
#ifdef DOTIME
gettimeofday(&s_val, 0);
endTime = (double) s_val.tv_sec + 0.000001*s_val.tv_usec;
printf("Reading CONECT: %lf\n", endTime - startTime);
startTime = endTime;
#endif
result = assign_radii(mol);
if (!result)
{
release_ums(mol);
return(FALSE);
}
#ifdef DOTIME
gettimeofday(&s_val, 0);
endTime = (double) s_val.tv_sec + 0.000001*s_val.tv_usec;
printf("Assigning Radii: %lf\n", endTime - startTime);
startTime = endTime;
#endif
if ((mol->control) && (NOTEQ(InputKeywords,"CONECT_ONLY")))
{
result = assign_pdb_bonds(mol);
if (!result)
{
release_ums(mol);
return(FALSE);
}
}
#ifdef DOTIME
gettimeofday(&s_val, 0);
endTime = (double) s_val.tv_sec + 0.000001*s_val.tv_usec;
printf("Assigning PDB bonds: %lf\n", endTime - startTime);
startTime = endTime;
#endif
qsort(mol->connections,Bonds,sizeof(connect_type),QSORT_PROTO sort_connections);
dissect_connection_table(mol);
#ifdef DOTIME
gettimeofday(&s_val, 0);
endTime = (double) s_val.tv_sec + 0.000001*s_val.tv_usec;
printf("Dissect Connection Table: %lf\n", endTime - startTime);
startTime = endTime;
#endif
if ((mol->control) && (NOTEQ(InputKeywords,"CONECT_ONLY")))
check_bonds(mol);
#ifdef DOTIME
gettimeofday(&s_val, 0);
endTime = (double) s_val.tv_sec + 0.000001*s_val.tv_usec;
printf("Check Bonds: %lf\n", endTime - startTime);
startTime = endTime;
#endif
result = assign_types(mol);
#ifdef DOTIME
gettimeofday(&s_val, 0);
endTime = (double) s_val.tv_sec + 0.000001*s_val.tv_usec;
printf("Assigning Types: %lf\n", endTime - startTime);
startTime = endTime;
#endif
if (!result)
{
release_ums(mol);
return(FALSE);
}
assign_bond_order(mol);
#ifdef DOTIME
gettimeofday(&s_val, 0);
endTime = (double) s_val.tv_sec + 0.000001*s_val.tv_usec;
printf("Assigning Bond Order: %lf\n", endTime - startTime);
startTime = endTime;
#endif
result = build_connection_table(mol);
#ifdef DOTIME
gettimeofday(&s_val, 0);
endTime = (double) s_val.tv_sec + 0.000001*s_val.tv_usec;
printf("Building Connection Table: %lf\n", endTime - startTime);
startTime = endTime;
#endif
if (!result)
{
release_ums(mol);
return(FALSE);
}
qsort(mol->connections,Bonds,sizeof(connect_type),QSORT_PROTO sort_connections);
dissect_connection_table(mol);
BO(0,0) = -1;
return(TRUE);
}
void fix_A_type(char *type, char *id, char *res_type)
{
/* Check for ASP, ASN and ASX */
/* Check for GLU, GLN and GLX */
if (EQn(id,"AD1",3) && (EQn(res_type,"AS",2) || EQn(res_type,"N",1)))
strcpy(type,"O");
else
if (EQn(id,"AD2",3) && (EQn(res_type,"AS",2) || EQn(res_type,"N",1)))
strcpy(type,"N");
else
if (EQn(id,"AE1",3) && (EQn(res_type,"GL",2) || EQn(res_type,"Q",1)))
strcpy(type,"O");
else
if (EQn(id,"AE2",3) && (EQn(res_type,"GL",2) || EQn(res_type,"Q",1)))
strcpy(type,"N");
else
if (EQn(id,"AD1",3) && (EQn(res_type,"HIS",3) || EQn(res_type,"H",1)))
strcpy(type,"N");
else
if (EQn(id,"AE1",3) && (EQn(res_type,"HIS",3) || EQn(res_type,"H",1)))
strcpy(type,"C");
else
if (EQn(id,"AE2",3) && (EQn(res_type,"HIS",3) || EQn(res_type,"H",1)))
strcpy(type,"N");
else
if (EQn(id,"AD2",3) && (EQn(res_type,"HIS",3) || EQn(res_type,"H",1)))
strcpy(type,"C");
}
void process_connect_records(FILE *file1, ums_type *mol)
{
char pdb_line[BUFF_SIZE];
int the_serial_num, conn1, conn2, conn3, conn4;
while ((fgets(pdb_line,sizeof(pdb_line), file1) != NULL) && NOTEQn(pdb_line,"END",3))
{
if EQn(pdb_line,"CONECT",6)
{
parse_conect_record(&pdb_line[6], &the_serial_num, &conn1, &conn2,
&conn3, &conn4);
if (conn1 > 0) add_bond(mol, the_serial_num, conn1);
if (conn2 > 0) add_bond(mol, the_serial_num, conn2);
if (conn3 > 0) add_bond(mol, the_serial_num, conn3);
if (conn4 > 0) add_bond(mol, the_serial_num, conn4);
}
}
}
void add_bond(ums_type *mol, int i, int j)
{
int k, start, end;
/*
* Need to find the atoms with the passed in serial numbers
*/
if (i < j)
{
start = i;
end = j;
}
else
{
start = j;
end = i;
}
for (k = 1; k <= Atoms; k++)
{
if (SerialNum(k) == start)
{
start = k;
break;
}
}
for (k = start + 1; k <= Atoms; k++)
{
if (SerialNum(k) == end)
{
end = k;
break;
}
}
if (ChainNum(start) == ChainNum(end))
{
Connection(start,Valence(start)) = end;
Connection(end,Valence(end)) = start;
Valence(start)++;
Valence(end)++;
Start(Bonds) = start;
End(Bonds) = end;
Bonds++;
}
}
int parse_atom_record(char *pdb_line, int the_atom, ums_type *mol)
{
/* ATOMFORMAT "(i5,1x,a4,a1,a3,1x,a1,i4,a1,3x,3f8.3,2f6.2,1x,i3)" */
int len;
int sc;
char tmp_str[10];
char *tmp_ptr;
len = strlen(pdb_line) - 1;
sc = 0;
/* serial number */
my_strncpy(tmp_str, &pdb_line[sc], 5); sc += 6;
SerialNum(the_atom) = atoi(tmp_str);
if (sc >= len) return(FALSE);
/* atom name */
my_strncpy(tmp_str, &pdb_line[sc], 4); sc += 4;
strcpy(AtmId(the_atom), tmp_str);
tmp_ptr = rtrim(tmp_str);
strcpy(Type(the_atom), tmp_str);
if (sc >= len) return(FALSE);
/* alternate location NOTUSED */
my_strncpy(tmp_str, &pdb_line[sc], 1); sc += 1;
if (sc >= len) return(FALSE);
/* residue name */
my_strncpy(tmp_str, &pdb_line[sc], 3); sc += 4;
if ((tmp_str[0] == ' ') && (tmp_str[1] == ' ') && (tmp_str[2] == ' '))
{
strcpy(ResName(the_atom), "UNK");
}
else
{
strcpy(ResName(the_atom), tmp_str);
}
if (sc >= len) return(FALSE);
/* chain ID NOTUSED */
my_strncpy(tmp_str, &pdb_line[sc], 1); sc += 1;
if (sc >= len) return(FALSE);
/* residue sequence number */
my_strncpy(tmp_str, &pdb_line[sc], 4); sc += 4;
ResNum(the_atom) = atoi(tmp_str);
if (sc >= len) return(FALSE);
/* residue insertion code NOTUSED */
my_strncpy(tmp_str, &pdb_line[sc], 1); sc += 4;
if (sc >= len) return(FALSE);
/* X, Y, Z */
my_strncpy(tmp_str, &pdb_line[sc], 8); sc += 8;
X(the_atom) = atof(tmp_str);
if (sc >= len) return(FALSE);
my_strncpy(tmp_str, &pdb_line[sc], 8); sc += 8;
Y(the_atom) = atof(tmp_str);
if (sc >= len) return(FALSE);
my_strncpy(tmp_str, &pdb_line[sc], 8); sc += 8;
Z(the_atom) = atof(tmp_str);
if (sc >= len) return(TRUE);
#ifdef COMPLETE_PDB
/* occupancy NOTUSED */
my_strncpy(tmp_str, &pdb_line[sc], 6); sc += 6;
if (sc >= len) return(TRUE);
/* temperature factor NOTUSED */
my_strncpy(tmp_str, &pdb_line[sc], 6); sc += 7;
if (sc >= len) return(TRUE);
/* footnote number NOTUSED*/
my_strncpy(tmp_str, &pdb_line[sc], 3);
#endif
return(TRUE);
}
int
parse_conect_record(char *pdb_line, int *serial_number,
int *conn1, int *conn2, int *conn3, int *conn4)
{
/* CONECTFORMAT "(5i5)" */
int len;
int sc;
char tmp_str[10];
len = strlen(pdb_line) - 1;
sc = 0;
*serial_number = *conn1 = *conn2 = *conn3 = *conn4 = 0;
my_strncpy(tmp_str, &pdb_line[sc], 5); sc += 5;
*serial_number = atoi(tmp_str);
if (sc >= len) return TRUE;
my_strncpy(tmp_str, &pdb_line[sc], 5); sc += 5;
*conn1 = atoi(tmp_str);
if (sc >= len) return TRUE;
my_strncpy(tmp_str, &pdb_line[sc], 5); sc += 5;
*conn2 = atoi(tmp_str);
if (sc >= len) return TRUE;
my_strncpy(tmp_str, &pdb_line[sc], 5); sc += 5;
*conn3 = atoi(tmp_str);
if (sc >= len) return TRUE;
my_strncpy(tmp_str, &pdb_line[sc], 5); sc += 5;
*conn4 = atoi(tmp_str);
return TRUE;
}
int
stringToType(int record_type, int the_atom, ums_type *mol)
{
int ilen;
int atnum;
char *tmpstr1;
char *tmpstr2;
char *pos;
if ((ilen = (int)strlen ( Type(the_atom) )) == 0) {
show_warning ( "Error parsing the atom name (length of zero)." );
return (FALSE);
}
tmpstr1 = strdup ( Type(the_atom) );
tmpstr2 = strdup ( Type(the_atom) );
uppercase ( tmpstr1 );
if (record_type == PDBATOM) {
/*
* In the standard PDB files, the first character will be blank or
* a digit. The second character will be the element we are looking
* for. There are exceptions to this. Often D is used for deuterium
* so we will convert D to H. For some of the residues, the atom type
* cannot be determined and the element name will start with A.
* We will look for these and handle as appropriate.
*/
ljust ( tmpstr2 );
ljust ( tmpstr1 );
pos = tmpstr1;
while (isdigit(pos[0])) pos++;
pos[1] = '\0';
if (pos[0] == 'A') {
fix_A_type(Type(the_atom), tmpstr2, ResName(the_atom));
free ( tmpstr1 );
free ( tmpstr2 );
return (TRUE);
}
} else {
/*
* The HETATM record is more complicated than the ATOM record in terms
* of determining the type of element from the atom label.
* We will do the following:
* If there is no residue name then we assume the atom label is the
* name of the element.
* If the residue name and the atom label are the same then we will
* assume that the atom label is the name of the element.
* Generally, if there is a letter in the first column then the
* atom label represents an element with a 2 letter element name.
* The exceptions so far to this is when the residue name is
* ADR, COA, FAD, GPG, NAD, NAL or NDP. In these cases there is a
* letter in the first column and the second letter is the element
* name.
*/
pos = tmpstr1;
/*
* Check the residue name and if one of those listed above, move
* the pointer.
*/
if ((!strcmp ( ResName(the_atom), "ADR" )) ||
(!strcmp ( ResName(the_atom), "COA" )) ||
(!strcmp ( ResName(the_atom), "FAD" )) ||
(!strcmp ( ResName(the_atom), "GPG" )) ||
(!strcmp ( ResName(the_atom), "NAD" )) ||
(!strcmp ( ResName(the_atom), "NAL" )) ||
(!strcmp ( ResName(the_atom), "NDP" ))) {
pos++;
pos[1] = '\0';
} else if (isdigit(pos[0])) {
pos++;
pos[1] = '\0';
} else if (pos[0] != ' ') {
pos[2] = '\0';
if (isalpha(pos[1]) && isupper(pos[1])) pos[1] = tolower(pos[1]);
} else {
ljust ( pos );
pos[1] = '\0';
}
}
strcpy(Type(the_atom), pos);
free ( tmpstr1 );
free ( tmpstr2 );
return (TRUE);
}
#undef DELIMS
#undef PDBATOM
#undef PDBHETATM
|