Main Page   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

mol2plugin.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2006 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: mol2plugin.C,v $
00013  *      $Author: johns $       $Locker:  $             $State: Exp $
00014  *      $Revision: 1.19 $       $Date: 2006/02/23 19:36:45 $
00015  *
00016  ***************************************************************************/
00017 
00018 /*
00019  * mol2 file reader
00020  * More information on this format is available at
00021  *   http://www.tripos.com/data/support/mol2.pdf
00022  *   http://www.tripos.com/mol2/
00023  *
00024  *   DOCK mol2 page: 
00025  *     http://www.csb.yale.edu/userguides/datamanip/dock/DOCK_4.0.1/html/Manual.41.html
00026  *
00027  * This plugin currently reads the following record types:
00028  *  MOLECULE
00029  *  ATOM
00030  *  BOND
00031  *
00032  */
00033 
00034 #include "molfile_plugin.h"
00035 
00036 #include <stdlib.h>
00037 #include <stdio.h>
00038 #include <string.h>
00039 
00040 #if defined(_AIX)
00041 #include <strings.h>
00042 #endif
00043 
00044 #define LINESIZE 256
00045 
00046 typedef struct {
00047   FILE *file;
00048   molfile_atom_t *atomlist;
00049   int natoms, nbonds, optflags, coords_read;
00050   int *from, *to;
00051   float *bondorder;
00052 } mol2data;
00053 
00054 // Open the file and create the mol2 struct used to pass data to the other
00055 // functions.
00056 static void *open_mol2_read(const char *path, const char *filetype, 
00057     int *natoms) {
00058   FILE *fd;
00059   mol2data *mol2;
00060   char line[LINESIZE]; 
00061   int match, nbonds, optflags;
00062 
00063   fd = fopen(path, "r");
00064   if (!fd)
00065     return NULL;
00066   
00067   // Find and read the MOLECULE record
00068   do {
00069     fgets(line, LINESIZE, fd);
00070     if ( ferror(fd) || feof(fd) ) {
00071       fprintf(stderr, "mol2plugin: No molecule record found in file.\n");
00072       return NULL;
00073     }
00074   } while ( strncmp(line, "@<TRIPOS>MOLECULE", 17) );
00075 
00076   fgets(line, LINESIZE, fd);  // Read and ignore the mol_name
00077   fgets(line, LINESIZE, fd);  // Read the molecule info
00078   match = sscanf(line, " %d %d", natoms, &nbonds);
00079   if (match == 1) {
00080     nbonds = 0;
00081   }
00082   else if (match != 2) {
00083     fprintf(stderr, "mol2plugin: Cannot determine the number of atoms.\n");
00084     return NULL;
00085   }
00086   fgets(line, LINESIZE, fd);  // Read and ignore the mol_type
00087   fgets(line, LINESIZE, fd);  // Read the charge_type
00088   if ( strncmp(line, "NO_CHARGES", 10) == 0 ) {
00089     optflags = MOLFILE_NOOPTIONS;
00090   }
00091   else {
00092     optflags = MOLFILE_CHARGE;
00093   }
00094 
00095   // Allocate and initialize the mol2 structure
00096   mol2 = new mol2data;
00097   mol2->file = fd;
00098   mol2->natoms = *natoms;
00099   mol2->nbonds = nbonds;
00100   mol2->optflags = optflags;
00101   mol2->coords_read = 0;
00102   mol2->from = NULL;
00103   mol2->to = NULL;
00104   mol2->bondorder = NULL;
00105 
00106   return mol2;
00107 }
00108 
00109 // Read atom information, but not coordinates.
00110 static int read_mol2(void *v, int *optflags, molfile_atom_t *atoms) {
00111   mol2data *mol2 = (mol2data *)v;
00112   char line[LINESIZE]; 
00113   int i, match;
00114   molfile_atom_t *atom;
00115 
00116   *optflags = mol2->optflags;
00117 
00118   // Find and read the ATOM record
00119   rewind(mol2->file);
00120   do {
00121     fgets(line, LINESIZE, mol2->file);
00122     if ( ferror(mol2->file) || feof(mol2->file) ) {
00123       fprintf(stderr, "mol2plugin: No atom record found in file.\n");
00124       return MOLFILE_ERROR;
00125     }
00126   } while ( strncmp(line, "@<TRIPOS>ATOM", 13) );
00127 
00128   // Read the atoms
00129   for (i = 0; i < mol2->natoms; i++) {
00130     atom = atoms+i;
00131 
00132     fgets(line, LINESIZE, mol2->file);
00133     if ( ferror(mol2->file) || feof(mol2->file) ) {
00134       fprintf(stderr, "mol2plugin: Error occurred reading atom record.\n");
00135       return MOLFILE_ERROR;
00136     }
00137 
00138     match = sscanf(line, " %*d %s %*f %*f %*f %*s %d %s %f", 
00139       atom->name, &atom->resid, atom->resname, &atom->charge);
00140 
00141     // The last three records are optional for mol2 files, supply values if
00142     // any are missing. Note that these cases are meant to fall through.
00143     switch (match) {
00144       case 0: 
00145         fprintf(stderr, "mol2plugin: Improperly formatted atom record.\n");
00146         return MOLFILE_ERROR;
00147 
00148       case 1:
00149         atom->resid = 0;
00150 
00151       case 2:
00152         sprintf(atom->resname, "%d", atom->resid);
00153 
00154       case 3:
00155         atom->charge = 0.0;
00156 
00157       default:
00158         break;
00159     }
00160 
00161     strcpy(atom->type, atom->name);
00162     // Leave these blank when not provided by the file.
00163     atom->chain[0] = '\0';
00164     atom->segid[0] = '\0';
00165   }
00166 
00167   return MOLFILE_SUCCESS;
00168 }
00169 
00170 
00171 
00172 // Create arrays of one-based bond indicies.
00173 static int read_mol2_bonds(void *v, int *nbonds, int **fromptr, int **toptr, float **bondorderptr) {
00174   mol2data *mol2 = (mol2data *)v;
00175   char line[LINESIZE], bond_type[16]; 
00176   int i, match, bond_from, bond_to, bond_index, current_nbonds;
00177   float curr_order;
00178 
00179   if (mol2->nbonds == 0) {
00180     *nbonds = 0;
00181     *fromptr = NULL;
00182     *toptr = NULL;
00183     return MOLFILE_SUCCESS;
00184   }
00185 
00186   // Allocate memory for the from and to arrays. This will be freed in
00187   // close_mol2_read
00188   mol2->from = new int[mol2->nbonds];
00189   mol2->to = new int[mol2->nbonds];
00190   mol2->bondorder=new float[mol2->nbonds];
00191   current_nbonds = mol2->nbonds;
00192 
00193   // Find and read the BOND record
00194   rewind(mol2->file);
00195   do {
00196     fgets(line, LINESIZE, mol2->file);
00197     if ( ferror(mol2->file) || feof(mol2->file) ) {
00198       fprintf(stderr, "mol2plugin: No atom record found in file.\n");
00199       return MOLFILE_ERROR;
00200     }
00201   } while ( strncmp(line, "@<TRIPOS>BOND", 13) );
00202 
00203   // Read the bonds
00204   bond_index = 0;
00205   for (i = 0; i < mol2->nbonds; i++) {
00206     fgets(line, LINESIZE, mol2->file);
00207     if ( ferror(mol2->file) || feof(mol2->file) ) {
00208       fprintf(stderr, "mol2plugin: Error occurred reading atom record.\n");
00209       return MOLFILE_ERROR;
00210     }
00211 
00212     //Move on if the next line is a header
00213     if (strncmp(line, "@", 1) == 0) {
00214       //Then the bonds are over
00215       break;
00216     }
00217 
00218     match = sscanf(line, " %*d %d %d %s", &bond_from, &bond_to, bond_type);
00219     if (match < 3) {
00220       fprintf(stderr, "mol2plugin: Improperly formatted bond record.\n");
00221       continue;
00222     }
00223     if ( strncmp(bond_type, "nc", 2) == 0 ) {
00224       // Not an actual bond, don't add it to the list
00225       current_nbonds--;
00226     }
00227     else {
00228       // Add the bond to the list
00229       curr_order=strtod(bond_type,NULL);
00230       if (curr_order<1.0 || curr_order>4.0) curr_order=1;
00231 //      fprintf(stdout,"mol2plugin: Bond from %d to %d of order %f\n", bond_from, bond_to, curr_order);
00232       mol2->from[bond_index] = bond_from;
00233       mol2->to[bond_index] = bond_to;
00234       mol2->bondorder[bond_index]=curr_order;
00235       bond_index++;
00236     }
00237   }
00238   if (bond_index > 0) {
00239     *nbonds = current_nbonds;
00240     *fromptr = mol2->from;
00241     *toptr = mol2->to;
00242     *bondorderptr = mol2->bondorder; 
00243   } else {
00244     printf("mol2plugin) WARNING: no bonds defined in mol2 file\n");
00245     *nbonds = 0;
00246     *fromptr = NULL;
00247     *toptr = NULL;
00248     *bondorderptr = NULL; 
00249   }
00250     
00251 //printf("End of read_mol2_bonds\n");
00252   return MOLFILE_SUCCESS;
00253 }
00254 
00255 
00256 // Read atom coordinates
00257 static int read_mol2_timestep(void *v, int natoms, molfile_timestep_t *ts) {
00258   mol2data *mol2 = (mol2data *)v;
00259   char line[LINESIZE];
00260   int i, match;
00261   float x, y, z;
00262 
00263   // Since the file is rewound when coordinates are read, EOF shouldn't
00264   // happen. Instead, use a flag to indicate that the single timestep has
00265   // been read
00266   if (mol2->coords_read) {
00267     return MOLFILE_EOF;
00268   }
00269 
00270   // Find and read the ATOM record
00271   rewind(mol2->file);
00272   do {
00273     fgets(line, LINESIZE, mol2->file);
00274     if ( ferror(mol2->file) || feof(mol2->file) ) {
00275       fprintf(stderr, "mol2plugin: No atom record found in file.\n");
00276       return MOLFILE_ERROR;
00277     }
00278   } while ( strncmp(line, "@<TRIPOS>ATOM", 13) );
00279 
00280   // Read the atoms
00281   for (i = 0; i < mol2->natoms; i++) {
00282     fgets(line, LINESIZE, mol2->file);
00283     if ( ferror(mol2->file) || feof(mol2->file) ) {
00284       fprintf(stderr, "mol2plugin: Error occurred reading atom coordinates.\n");
00285       return MOLFILE_ERROR;
00286     }
00287 
00288 
00289     match = sscanf(line, " %*d %*s %f %f %f", &x, &y, &z);
00290     if (match < 3) {
00291       fprintf(stderr, "mol2plugin: Improperly formatted atom coordinates.\n");
00292       return MOLFILE_ERROR;
00293     }
00294 
00295     if (ts) {
00296       ts->coords[3*i  ] = x;
00297       ts->coords[3*i+1] = y;
00298       ts->coords[3*i+2] = z;
00299     }
00300   }
00301 
00302   mol2->coords_read = 1;
00303   return MOLFILE_SUCCESS;
00304 }
00305 
00306 
00307 static void *open_mol2_write(const char *filename, const char *filetype, 
00308                            int natoms) {
00309   FILE *fd;
00310   mol2data *data;
00311 
00312   fd = fopen(filename, "w");
00313   if (!fd) { 
00314     fprintf(stderr, "Error) Unable to open mol2 file %s for writing\n",
00315             filename);
00316     return NULL;
00317   }
00318   
00319   data = (mol2data *)malloc(sizeof(mol2data));
00320   data->natoms = natoms;
00321   data->file = fd;
00322 //  data->file_name = strdup(filename);
00323   return data;
00324 }
00325 
00326 
00327 static int write_mol2_structure(void *mydata, int optflags, 
00328                                const molfile_atom_t *atoms) {
00329   mol2data *data = (mol2data *)mydata;
00330   data->atomlist = (molfile_atom_t *)malloc(data->natoms*sizeof(molfile_atom_t));
00331   memcpy(data->atomlist, atoms, data->natoms*sizeof(molfile_atom_t));
00332   return MOLFILE_SUCCESS;
00333 }
00334 
00335 void getmol2ff(char* outputtype, const char* psftype) {
00336 //fprintf(stdout,"Doing ff typing on %s\n",psftype);
00337   if (strncmp(psftype,"H",1)==0) {
00338     //It's a hydrogen
00339     strncpy(outputtype, "H   ",4);
00340     return;
00341   } else if (strncmp(psftype,"C",1)==0) {
00342     //It's a carbon... probably
00343     if (strncmp(psftype,"C ",2)==0 || strncmp(psftype,"CA ",3)==0 || strncmp(psftype,"CPH",3)==0 || strncmp(psftype,"CPT",3)==0 || strncmp(psftype,"CC ",3)==0 || strncmp(psftype,"CD ",3)==0 || strncmp(psftype,"CN1",3)==0 || strncmp(psftype,"CN2",3)==0 || strncmp(psftype,"CN3",3)==0 || strncmp(psftype,"CN4",3)==0 || strncmp(psftype,"CN5",3)==0 || strncmp(psftype,"CNA",3)==0) {
00344           strncpy(outputtype, "C.2 ",4);
00345           return;
00346     } else {
00347           strncpy(outputtype, "C.3 ",4);
00348           return;
00349     }  
00350   } else if (strncmp(psftype,"N",1)==0) {
00351      //It"s probably nitrogen
00352      if (strncmp(psftype,"NR",2)==0 || strncmp(psftype,"NH1",3)==0 || strncmp(psftype,"NH2",3)==0 || strncmp(psftype,"NC2",3)==0 || strncmp(psftype,"NY",2)==0 || (strncmp(psftype,"NN",2)==0 && strncmp(psftype,"NN6",3)!=0)) {
00353        strncpy(outputtype, "N.am",4);
00354        return;
00355        } else {
00356        strncpy(outputtype, "N.3 ",4);
00357        return;
00358        }
00359   } else if (strncmp(psftype,"O",1)==0) {
00360      //Probably an oxygen
00361      if (strncmp(psftype,"OH1",3)==0 || strncmp(psftype,"OS",2)==0 || strncmp(psftype,"OT ",3)==0 || strncmp(psftype,"ON4",3)==0 || strncmp(psftype,"ON5",3)==0 || strncmp(psftype,"ON6",3)==0) {
00362         strncpy(outputtype, "O.3 ",4);
00363         return;
00364      } else {
00365         strncpy(outputtype, "O.2 ",4);
00366         return;
00367      } 
00368   } else if (strncmp(psftype,"S",1)==0) {
00369      strncpy(outputtype, "S.3 ",4);
00370      return;
00371   } else if (strncmp(psftype,"P",1)==0) {
00372      strncpy(outputtype, "P.3 ",4);
00373      return;
00374   } else {
00375      strncpy(outputtype, "X.  ",4);
00376      return;
00377   }
00378 }
00379 
00380 
00381 
00382 
00383 
00384 
00385 static int write_mol2_timestep(void *mydata, const molfile_timestep_t *ts) {
00386   mol2data *data = (mol2data *)mydata; 
00387   const molfile_atom_t *atom;
00388   const float *pos;
00389   float chrgsq;
00390   int i;
00391 
00392   // try to guess whether we have charge information.
00393   chrgsq=0.0;
00394   atom = data->atomlist;
00395   for (i = 0; i < data->natoms; i++) {
00396       chrgsq += atom->charge*atom->charge;
00397       ++atom;
00398   }
00399 
00400   //print header block
00401   fprintf(data->file, "@<TRIPOS>MOLECULE\n");
00402   fprintf(data->file, "generated by VMD\n");
00403   fprintf(data->file, "%4d %4d 1 0 0\n", data->natoms, data->nbonds);
00404   fprintf(data->file, "SMALL\n");
00405   // educated guess
00406   if (chrgsq > 0.0001) {
00407       fprintf(data->file, "USER_CHARGES\n");
00408   } else {
00409       fprintf(data->file, "NO_CHARGES\n");
00410   }
00411   fprintf(data->file, "****\n");
00412   fprintf(data->file, "Energy = 0\n\n");
00413   
00414   //print atoms block
00415   fprintf(data->file, "@<TRIPOS>ATOM\n");
00416   atom = data->atomlist;
00417   pos = ts->coords;
00418   char mol2fftype[5];
00419   for (i = 0; i < data->natoms; i++) {
00420     getmol2ff(mol2fftype, atom->type);
00421     fprintf(data->file, "%7d %-4s      %8.4f  %8.4f  %8.4f %4s %4d  %3s        %8.6f\n",
00422             i+1, atom->name, pos[0], pos[1], pos[2], mol2fftype, 
00423             atom->resid, atom->resname, atom->charge);
00424     ++atom; 
00425     pos += 3;
00426   }
00427 
00428   //print bond info
00429 
00430   int j; //temp variable
00431   int k; // temp variable
00432   int l=1; //number of bond record
00433   printf("mol2plugin) numbonds: %d\n", data->nbonds);
00434   if (data->nbonds>0) fprintf(data->file, "@<TRIPOS>BOND\n");
00435   for (i=0; i<data->nbonds; i++) {
00436     //For mol2, only write bonds for fromptr[i]<toptr[i]
00437     j=data->from[i];
00438     k=data->to[i];
00439     // bondorder is either 1, 2, 3 or a textual representation: am,ar,du,un,nc
00440     // we don't have the info for the text, so we truncate to integer.
00441     fprintf(data->file, "%5d %5d %5d %2d\n", l ,data->from[i], data->to[i],
00442             (int)data->bondorder[i]);
00443     l++;
00444 //    printf("  from: %8d   to: %8d\n", fromptr[i], toptr[i]);
00445   } 
00446 
00447   // Print out substructure info to keep some programs sane
00448   fprintf(data->file,"\n@<TRIPOS>SUBSTRUCTURE\n");
00449   fprintf(data->file,"1 ****        1 TEMP                        ");
00450   fprintf(data->file,"0 ****  **** 0 ROOT\n");
00451 
00452   return MOLFILE_SUCCESS;
00453 }
00454 
00455 static int write_bonds(void *v, int nbonds, int *fromptr, int *toptr, float *bondorderptr) {
00456   mol2data *data = (mol2data *)v;
00457   data->from = new int[nbonds];
00458   data->to = new int[nbonds];
00459   data->bondorder = new float[nbonds];
00460   //set the pointers for use later
00461   for (int i=0;i<nbonds;i++) {
00462           data->from[i]=fromptr[i];
00463           data->to[i]=toptr[i];
00464           data->bondorder[i]=bondorderptr[i];
00465   }
00466   data->nbonds = nbonds;
00467   return MOLFILE_SUCCESS;
00468 }
00469 
00470 
00471 static void close_mol2_write(void *mydata) {
00472   mol2data *data = (mol2data *)mydata;
00473   if (data) {
00474     if (data->file) fclose(data->file);
00475     if (data->from != NULL) free(data->from);
00476     if (data->to != NULL)   free(data->to);
00477     if (data->bondorder != NULL)   free(data->bondorder);
00478     delete data;
00479   }
00480 }
00481 
00482 //
00483 // Free the memory used by the mol2 structure
00484 static void close_mol2_read(void *v) {
00485   mol2data *mol2 = (mol2data *)v;
00486   if (mol2) {
00487     if (mol2->file) fclose(mol2->file);
00488     if (mol2->from != NULL) free(mol2->from);
00489     if (mol2->to != NULL)   free(mol2->to);
00490     if (mol2->bondorder != NULL)   free(mol2->bondorder);
00491     delete mol2;
00492   }
00493 }
00494 
00495 
00496 static int read_bonds(void *v, int *nbonds, int **fromptr, int **toptr, float **bondorderptr) {
00497   mol2data *mol2 = (mol2data *)v;
00498 
00499   /* now read bond data */
00500 //  *nbonds = start_psf_bonds(psf->fp);
00501 
00502   if (mol2->nbonds > 0) {
00503     mol2->from = (int *) malloc(*nbonds*sizeof(int));
00504     mol2->to = (int *) malloc(*nbonds*sizeof(int));
00505     mol2->bondorder = (float *) malloc(*nbonds*sizeof(float));
00506     if ((read_mol2_bonds(mol2, nbonds, &(mol2->from), &(mol2->to), &(mol2->bondorder))) != MOLFILE_SUCCESS) {
00507       fclose(mol2->file);
00508       mol2->file = NULL;
00509       return MOLFILE_ERROR;
00510     }
00511     *fromptr = mol2->from;
00512     *toptr = mol2->to;
00513     *bondorderptr = mol2->bondorder; 
00514   } else {
00515     printf("mol2plugin) WARNING: no bonds defined in mol2 file.\n");
00516     *fromptr=NULL;
00517     *toptr=NULL;
00518     *bondorderptr=NULL;
00519   }
00520   return MOLFILE_SUCCESS;
00521 }
00522 
00523 static molfile_plugin_t mol2plugin = {
00524   vmdplugin_ABIVERSION,
00525   MOLFILE_PLUGIN_TYPE,                         /* type */
00526   "mol2",                                      /* short name */
00527   "MDL mol2",                                  /* pretty name */
00528   "Peter Freddolino and Eamon Caddigan",       /* author */
00529   0,                                           /* major version */
00530   8,                                           /* minor version */
00531   VMDPLUGIN_THREADSAFE,                        /* is reentrant */
00532   "mol2",
00533   open_mol2_read,               /*open file read*/
00534   read_mol2,                    /*read structure*/
00535   read_bonds,                   /* read bond list*/
00536   read_mol2_timestep,           /* read next timestep*/
00537   close_mol2_read,              /* close_file_read */
00538   open_mol2_write,              /* open file for writing */
00539   write_mol2_structure,         /* write structure */
00540   write_mol2_timestep,          /* write tempestep*/
00541   close_mol2_write,             /*close file for writing*/
00542   0,                            /* read_volumetric_metadata */
00543   0,                            /* read_volumetric_data */
00544   0,                            /* read_rawgraphics */
00545   0,                            /*read molecule metadata */
00546   write_bonds                   /* write bonds */
00547 };
00548 
00549 VMDPLUGIN_API int VMDPLUGIN_init() {
00550   return VMDPLUGIN_SUCCESS;
00551 }
00552 
00553 VMDPLUGIN_API int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) {
00554   (*cb)(v, (vmdplugin_t *)&mol2plugin);
00555   return VMDPLUGIN_SUCCESS;
00556 }
00557 
00558 VMDPLUGIN_API int VMDPLUGIN_fini() {
00559   return VMDPLUGIN_SUCCESS;
00560 }
00561 

Generated on Wed Mar 22 13:15:30 2006 for VMD Plugins (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002