Main Page   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

webpdbplugin.c

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2006 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: webpdbplugin.c,v $
00013  *      $Author: johns $       $Locker:  $             $State: Exp $
00014  *      $Revision: 1.44 $       $Date: 2006/03/01 19:55:25 $
00015  *
00016  ***************************************************************************/
00017 
00018 #include <tcl.h>
00019 #include <stdio.h>
00020 #include <stdlib.h>
00021 #include <string.h>
00022 #include "molfile_plugin.h"
00023 #include "readpdb.h"
00024 #include "periodic_table.h"
00025 
00026 /*
00027  * Load pdb from the RCSB
00028  * Uses Tcl
00029  */
00030 
00031 /*
00032  * Need my own read_pdb_record because the one in readpdb takes a FILE*.
00033  * This one will be better anyway since I don't recopy the string ;-)
00034  * Read the given pdb string.  On returning, pos will point to the start of
00035  * the next read. 
00036  */ 
00037 static int my_read_pdb_record(const char *pdb, char **pos) {
00038   int recType = PDB_UNKNOWN;
00039   char *nlpos;  /* newline position */
00040 
00041   nlpos = strchr(pdb, '\n'); /* XXX segv occurs on x86_64 linux */
00042                              /* loading '1epw' or '1sft'        */ 
00043 
00044   if (!nlpos) {
00045     return PDB_EOF;
00046   } 
00047 
00048   /* set the next position to the first char after the newline */
00049   *pos = nlpos + 1;
00050 
00051   /* atom records are the most common */
00052   if (!strncmp(pdb, "ATOM ",  5) || !strncmp(pdb, "HETATM", 6)) {
00053     /* Note that by only comparing 5 chars for "ATOM " rather than 6,     */
00054     /* we allow PDB files containing > 99,999 atoms generated by AMBER    */
00055     /* to load which would otherwise fail.  Not needed for HETATM since   */
00056     /* those aren't going to show up in files produced for/by MD engines. */
00057     recType = PDB_ATOM;
00058   } else if (!strncmp(pdb, "REMARK", 6)) {
00059     recType = PDB_REMARK;
00060   } else if (!strncmp(pdb, "CRYST1", 6)) {
00061     recType = PDB_CRYST1;
00062   } else if (!strncmp(pdb, "HEADER", 6)) {
00063     recType = PDB_HEADER;
00064   } else if (!strncmp(pdb, "END", 3)) {  /* very permissive */
00065     /* XXX we treat any "ENDxxx" record as an end, to simplify testing */
00066     /*     since we don't remove trailing '\n' chars                   */
00067 
00068     /* the only two legal END records are "END   " and "ENDMDL" */
00069     recType = PDB_END;
00070   } 
00071 
00072   return recType;
00073 }
00074 
00075  
00076 typedef struct {
00077   char *pdbstr; 
00078   char *pos;
00079   int natoms;
00080   molfile_metadata_t *meta;
00081   int nconect;
00082   int nbonds, maxbnum;
00083   int *from, *to, *idxmap;
00084 } pdbdata;
00085 
00086 
00087 static void *pdb_read(char *pdbstr, int *natoms) {
00088   pdbdata *pdb;
00089   int indx, nconect;
00090   char *pos = pdbstr;
00091   char *next;
00092 
00093   if (!pdbstr) return NULL;
00094 
00095   pdb = (pdbdata *)malloc(sizeof(pdbdata));
00096   pdb->meta = (molfile_metadata_t *) malloc(sizeof(molfile_metadata_t));
00097   memset(pdb->meta, 0, sizeof(molfile_metadata_t));
00098 
00099   pdb->meta->remarklen = 0;
00100   pdb->meta->remarks = NULL;
00101 
00102   *natoms=0;
00103   nconect=0;
00104   do {
00105     indx = my_read_pdb_record(pos, &next);
00106     if (indx == PDB_ATOM) {
00107       *natoms += 1;
00108     } else if (indx == PDB_CONECT) {
00109       nconect++;
00110     } else if (indx == PDB_HEADER) {
00111       get_pdb_header(pos, pdb->meta->accession, pdb->meta->date, NULL);
00112       if (strlen(pdb->meta->accession) > 0)
00113         strcpy(pdb->meta->database, "PDB");
00114     } else if (indx == PDB_REMARK || indx == PDB_UNKNOWN) {
00115       int len = next - pos;
00116       int newlen = len + pdb->meta->remarklen;
00117 
00118       char *newstr=realloc(pdb->meta->remarks, newlen + 1);
00119       if (newstr != NULL) {
00120         pdb->meta->remarks = newstr;
00121         pdb->meta->remarks[pdb->meta->remarklen] = '\0';
00122         memcpy(pdb->meta->remarks + pdb->meta->remarklen, pos, len);
00123         pdb->meta->remarks[newlen] = '\0';
00124         pdb->meta->remarklen = newlen;
00125       }
00126     }
00127 
00128     pos = next;
00129   } while (indx != PDB_END && indx != PDB_EOF);
00130 
00131   pdb->pdbstr = pdbstr;
00132   pdb->pos =    pdbstr;
00133 
00134   pdb->natoms = *natoms;
00135   pdb->nconect = nconect;
00136   pdb->nbonds = 0;
00137   pdb->maxbnum = 0;
00138   pdb->from = NULL;
00139   pdb->to = NULL;
00140   pdb->idxmap = NULL;
00141 
00142 #if defined(VMDUSECONECTRECORDS)
00143   /* allocate atom index translation table if we have 99,999 atoms or less */
00144   /* and we have conect records to process                                 */
00145   if (pdb->natoms < 100000 && pdb->nconect > 0) {
00146     pdb->idxmap = (int *) malloc(100000 * sizeof(int));
00147     memset(pdb->idxmap, 0, 100000 * sizeof(int));
00148   }
00149 #endif
00150 
00151   return pdb;
00152 }
00153 
00154 static const char *rcsbmsg[] = {
00155   "  The PDB is supported by RCSB, the NSF, US PHS, NIH, NCRP, NIGMS, NLM,",
00156   "and US DoE, who are not liable for the data.  PDB files shall not be",
00157   "sold.  See ftp://ftp.rcsb.org/advisory.doc for full details."
00158 };
00159 
00160 static int show_msg = 1;
00161 
00162 static void *open_file_read(const char *filename, const char *filetype,
00163     int *natoms) {
00164 
00165   Tcl_Interp *interp;
00166   char url[300];
00167   char cmd[300]; 
00168   char *pdbfile;
00169   const char *result;
00170   void *v;
00171 
00172   /*
00173    * Create and initialize the interpreter
00174    */
00175   interp = Tcl_CreateInterp();
00176   if (!interp) {
00177     fprintf(stderr, "Could not create new Tcl Interp\n");
00178     return NULL; 
00179   }
00180   if (Tcl_Init(interp) != TCL_OK) {
00181     fprintf(stderr, "Warning, could not create initialize Tcl Interp\n");
00182   }
00183   if (!Tcl_PkgRequire(interp, (char *)"http", (char *)"2.0", 0)) {
00184     fprintf(stderr, "Could not load http package\n");
00185     Tcl_DeleteInterp(interp);
00186     return NULL;
00187   }
00188 
00189   if (strlen(filename) != 4) {
00190     fprintf(stderr, "PDB code %s is invalid; PDB accession codes have four letters.\n", filename);
00191     Tcl_DeleteInterp(interp);
00192     return NULL;
00193   }
00194 
00195   if (show_msg) {
00196     int i;
00197     show_msg = 0;
00198     for (i=0; i<3; i++) printf("%s\n", rcsbmsg[i]);
00199   }
00200 
00201   // Adapted to new PDB website layout, changed on 1/1/2006 
00202   sprintf(url, "http://www.rcsb.org/pdb/downloadFile.do?fileFormat=pdb&compression=NO&structureId=%s",filename);
00203   sprintf(cmd, "set token [::http::geturl \"%s\"]", url);
00204   if (Tcl_Eval(interp, cmd) != TCL_OK) {
00205     fprintf(stderr, "Error loading PDB: %s\n",interp->result);
00206     Tcl_DeleteInterp(interp);
00207     return NULL;
00208   } 
00209   sprintf(cmd, "upvar #0 $token state");
00210   Tcl_Eval(interp, cmd); 
00211   
00212   result = Tcl_GetVar2(interp, (char *)"state", "body", TCL_GLOBAL_ONLY); 
00213   if (!result) {
00214     fprintf(stderr, "Error loading PDB: %s\n", interp->result);
00215     Tcl_DeleteInterp(interp);
00216     return NULL;
00217   } 
00218   pdbfile = strdup(result);
00219   Tcl_DeleteInterp(interp);
00220 
00221   /* XXX this code needs updating still */
00222   /* pdbfile will be free'd by close_pdb() */
00223   v = pdb_read(pdbfile, natoms); 
00224   return v;
00225 }
00226    
00227 static int read_pdb_structure(void *mydata, int *optflags, 
00228     molfile_atom_t *atoms) {
00229 
00230   pdbdata *pdb = (pdbdata *)mydata;
00231   char *pos = pdb->pdbstr;
00232   char *next;
00233   int i, rectype, atomserial, pteidx;
00234   char ridstr[8];
00235   char elementsymbol[3];
00236   molfile_atom_t *atom;
00237   int badptecount = 0;
00238   elementsymbol[2]=0;
00239 
00240   *optflags = MOLFILE_INSERTION | MOLFILE_OCCUPANCY | MOLFILE_BFACTOR | 
00241               MOLFILE_ALTLOC | MOLFILE_ATOMICNUMBER | MOLFILE_BONDSSPECIAL;
00242 
00243   i=0; /* Count atoms */
00244   do {
00245     rectype = my_read_pdb_record(pos, &next);
00246     switch (rectype) {
00247     case PDB_ATOM:
00248       atom = atoms+i;
00249       get_pdb_fields(pos, next-pos, &atomserial,
00250           atom->name, atom->resname, atom->chain, atom->segid, 
00251           ridstr, atom->insertion, atom->altloc, elementsymbol,
00252           NULL, NULL, NULL, &atom->occupancy, &atom->bfactor);
00253 
00254       if (pdb->idxmap != NULL && atomserial < 100000) {
00255         pdb->idxmap[atomserial] = i; /* record new serial number translation */
00256       }
00257 
00258       atom->resid = atoi(ridstr);
00259 
00260       /* determine atomic number from the element symbol */
00261       pteidx = get_pte_idx_from_string(elementsymbol);
00262       atom->atomicnumber = pteidx;
00263       if (pteidx != 0) {
00264         atom->mass = get_pte_mass(pteidx);
00265         atom->radius = get_pte_vdw_radius(pteidx);
00266       } else {
00267         badptecount++; /* unrecognized element */
00268       }
00269 
00270       strcpy(atom->type, atom->name);
00271       i++;
00272       break;
00273 
00274     case PDB_CONECT:
00275       /* only read CONECT records for structures where we know they can */
00276       /* be valid for all of the atoms in the structure                 */
00277       if (pdb->idxmap != NULL) {
00278         char cbuf[PDB_BUFFER_LENGTH];
00279         int len = next-pos;
00280 
00281         if (len > PDB_BUFFER_LENGTH) 
00282           len = PDB_BUFFER_LENGTH;
00283         strncpy(cbuf, pos, len);
00284         get_pdb_conect(cbuf, pdb->natoms, pdb->idxmap,
00285                        &pdb->maxbnum, &pdb->nbonds, &pdb->from, &pdb->to);
00286       }
00287       break;
00288 
00289     default:
00290       /* other record types are ignored in the structure callback */
00291       /* and are dealt with in the timestep callback or elsewhere */
00292       break;
00293     }
00294     pos = next;
00295   } while (rectype != PDB_END && rectype != PDB_EOF);
00296 
00297   /* if all atoms are recognized, set the mass and radius flags too,  */
00298   /* otherwise let VMD guess these for itself using it's own methods  */
00299   if (badptecount == 0) {
00300     *optflags |= MOLFILE_MASS | MOLFILE_RADIUS;
00301   }
00302 
00303   return MOLFILE_SUCCESS;
00304 }
00305 
00306 
00307 static int read_bonds(void *v, int *nbonds, int **fromptr, int **toptr, float **bondorder) {
00308   pdbdata *pdb = (pdbdata *)v;
00309  
00310   *nbonds = 0;
00311   *fromptr = NULL;
00312   *toptr = NULL;
00313   *bondorder = NULL; /* PDB files don't have bond order information */
00314 
00315 // The newest plugin API allows us to return CONECT records as
00316 // additional bonds above and beyond what the distance search returns.
00317 // Without that feature, we otherwise have to check completeness and
00318 // ignore them if they don't look to be fully specified for this molecule
00319 #if !defined(MOLFILE_BONDSSPECIAL)
00320   if (pdb->natoms >= 100000) {
00321     printf("webpdbplugin) Warning: more than 99,999 atoms, ignored CONECT records\n");
00322     return MOLFILE_SUCCESS;
00323   } else if (((float) pdb->nconect / (float) pdb->natoms) <= 0.85) {
00324     printf("webpdbplugin) Warning: Probable incomplete bond structure specified,\n");
00325     printf("webpdbplugin)          ignoring CONECT records\n");
00326     return MOLFILE_SUCCESS;
00327   } else if (pdb->nconect == 0) {
00328     return MOLFILE_SUCCESS;
00329   }
00330 #endif
00331 
00332   *nbonds = pdb->nbonds;
00333   *fromptr = pdb->from;
00334   *toptr = pdb->to;
00335 
00336   return MOLFILE_SUCCESS;
00337 }
00338 
00339 
00340 static int read_next_timestep(void *v, int natoms, molfile_timestep_t *ts) {
00341   pdbdata *pdb = (pdbdata *)v;
00342   char *pos = pdb->pos;
00343   char *next;
00344   float *x, *y, *z;
00345   float occup, bfac;
00346   int indx, i = 0;
00347 
00348   if (ts) {
00349     x = ts->coords;
00350     y = x+1;
00351     z = x+2;
00352   } else {
00353     x = y = z = 0;
00354   }
00355   do {
00356     indx = my_read_pdb_record(pos, &next);
00357     if((indx == PDB_END || indx == PDB_EOF) && (i < pdb->natoms)) {
00358       return MOLFILE_ERROR;
00359     } else if(indx == PDB_ATOM) {
00360       if(i++ >= pdb->natoms) {
00361         break;
00362       }
00363       /* just get the coordinates, and store them */
00364       if (ts) {
00365         get_pdb_coordinates(pos, x, y, z, &occup, &bfac);
00366         x += 3;
00367         y += 3;
00368         z += 3;
00369       }
00370     } else if (indx == PDB_CRYST1) {
00371       if (ts) {
00372         get_pdb_cryst1(pos, &ts->alpha, &ts->beta, &ts->gamma,
00373                                &ts->A, &ts->B, &ts->C);
00374       }
00375     }
00376     pos = next;
00377   } while(!(indx == PDB_END || indx == PDB_EOF));
00378   pdb->pos = pos;
00379 
00380   return MOLFILE_SUCCESS;
00381 }
00382 
00383 static void close_pdb_read(void *v) {
00384   pdbdata *pdb = (pdbdata *)v;
00385   if (!pdb) return;
00386   free(pdb->pdbstr);
00387   if (pdb->idxmap != NULL)
00388     free(pdb->idxmap);
00389   if (pdb->meta->remarks != NULL)
00390     free(pdb->meta->remarks);
00391   if (pdb->meta != NULL)
00392     free(pdb->meta);
00393   free(pdb);
00394 }
00395 
00396 
00397 static int read_molecule_metadata(void *v, molfile_metadata_t **metadata) {
00398   pdbdata *pdb = (pdbdata *)v;
00399   *metadata = pdb->meta;
00400   return MOLFILE_SUCCESS;
00401 }
00402 
00403 /* 
00404  * Registration stuff
00405  */
00406 
00407 static molfile_plugin_t plugin = {
00408   vmdplugin_ABIVERSION,             /* ABI version */
00409   MOLFILE_PLUGIN_TYPE,              /* type */
00410   "webpdb",                         /* name */
00411   "Web PDB Download",               /* name */
00412   "Justin Gullingsrud, John Stone", /* author */
00413   1,                                /* major version */
00414   10,                               /* minor version */
00415   VMDPLUGIN_THREADSAFE,             /* is reentrant */
00416   "",                               /* filename extension */
00417   open_file_read,
00418   read_pdb_structure,
00419   read_bonds,
00420   read_next_timestep,
00421   close_pdb_read,
00422   0,
00423   0,
00424   0,
00425   0,
00426   0,
00427   0,
00428   0,
00429   read_molecule_metadata
00430 };
00431 
00432 VMDPLUGIN_API int VMDPLUGIN_init() {
00433   return VMDPLUGIN_SUCCESS;
00434 }
00435 
00436 VMDPLUGIN_API int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) {
00437   (*cb)(v, (vmdplugin_t *)&plugin);
00438   return VMDPLUGIN_SUCCESS;
00439 }
00440 
00441 VMDPLUGIN_API int VMDPLUGIN_fini() {
00442   return VMDPLUGIN_SUCCESS;
00443 }
00444 
00445 
00446 #ifdef TEST_WEBPDB_PLUGIN
00447 
00448 int main(int argc, char *argv[]) {
00449   char *file;
00450   if (argc < 2) {
00451     fprintf(stderr, "Usage: %s <pdbcode>\n", argv[0]);
00452     return -1;
00453   }
00454   file = (char *)open_file_read(argv[1], "webpdb",  NULL);
00455   printf("%s\n", file);
00456   free(file);
00457   return 0;
00458 }
00459 
00460 #endif

Generated on Wed Mar 22 13:15:31 2006 for VMD Plugins (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002