Main Page   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

mdfplugin.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2006 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: mdfplugin.C,v $
00013  *      $Author: johns $       $Locker:  $             $State: Exp $
00014  *      $Revision: 1.11 $       $Date: 2006/02/23 19:36:45 $
00015  *
00016  ***************************************************************************/
00017 
00018 /*
00019  * Molecular data file (.mdf) reader
00020  * Insight II, Discover, etc. structure and bond information. This plugin
00021  * reads only the topology section, ignoring the optional symmertry and
00022  * atomset sections.
00023  *
00024  * Format specification can be found at:
00025  * http://instinct.v24.uthscsa.edu/~hincklab/html/soft_packs/msi_docs/insight980/formats980/File_Formats_1998.html#484257
00026  *
00027  * TODO: The current code reads the file *four* times -- once on open, once
00028  * to read the structure, and twice to read the bonds. Perhaps these could
00029  * be consolidated, e.g. by counting the bonds and populating the hash
00030  * tables during open or read_structure.
00031  *
00032  */
00033 
00034 #include "molfile_plugin.h"
00035 
00036 #define VMDPLUGIN_STATIC
00037 #include "hash.h"
00038 
00039 #include <stdlib.h>
00040 #include <stdio.h>
00041 #include <string.h>
00042 #include <ctype.h>
00043 
00044 #if defined(_AIX)
00045 #include <strings.h>
00046 #endif
00047 
00048 #define LINESIZE 256
00049 #define NAMESIZE 32
00050 
00051 typedef struct {
00052   FILE *file;
00053   int natoms, nmols, *from, *to;
00054   long mol_data_location;
00055 } mdfdata;
00056 
00057 // Read a line of atom data and store the values in the atom structure
00058 // Return 1 on success, 0 on error
00059 static int read_mdf_structure_line(molfile_atom_t *atom, const char *line) {
00060   // Read pertinent structure information from the line
00061   if ( sscanf(line, "%[^:]:%s %s %*s %*s %*d %*s %f %*d %*d %*d %f",
00062               atom->resname, atom->name, atom->type, 
00063               &atom->charge, &atom->occupancy) != 5 ) {
00064     return 0;
00065   }
00066 
00067   // Get the resid from the resname
00068   if ( sscanf(atom->resname, "%*[^_]_%d", &atom->resid) != 1 ) {
00069     return 0;
00070   }
00071 
00072   // Provide defaults for missing values
00073   atom->chain[0] = '\0';
00074   atom->segid[0] = '\0';
00075 
00076   return 1;
00077 }
00078 
00079 // Read the atom info from src and copy the connectivity record to dest.
00080 // Convert each record to resname_resnumber:atom form
00081 // Return 1 on success, 0 on error
00082 static int get_mdf_bonds(char *dest, const char *src) {
00083   char resinfo[NAMESIZE], bond_records[LINESIZE], *curr, *next, *tmp;
00084 
00085   // Get the connectivity records
00086   if ( sscanf(src, "%[^:]:%*s %*s %*s %*s %*d %*s %*f %*d %*d %*d %*f %*f %256c", resinfo, bond_records) != 2 ) {
00087     return 0;
00088   }
00089 
00090   // Append the bonds to the destination string, converting then to the
00091   // correct format along the way.
00092   dest[0] = '\0';
00093   for ( curr = bond_records; (next = strchr(curr, ' ')) != NULL;
00094         curr = next + 1 ) {
00095     *next = '\0';
00096 
00097     // Prepend the resname and resid to the destination atom name if it's
00098     // not already present.
00099     if ( strchr(curr, ':') == NULL ) {
00100       strcat(dest, resinfo);
00101       strcat(dest, ":");
00102     }
00103 
00104     // Remove cell/sympop/bondorder information from the bond
00105     if ( ((tmp = strchr(curr, '%')) != NULL) ||
00106          ((tmp = strchr(curr, '#')) != NULL) ||
00107          ((tmp = strchr(curr, '/')) != NULL) ||
00108          ((tmp = strchr(curr, '\n')) != NULL) ) {
00109       *tmp = '\0';
00110     }
00111     strcat(dest, curr);
00112     strcat(dest, " ");
00113   }
00114 
00115   return 1;
00116 }
00117 
00118 // Return the number of bond records on a line
00119 static int count_mdf_bonds(const char *line) {
00120   char bond_records[LINESIZE];
00121   int bonds = 0;
00122   char *tmp;
00123 
00124   if ( !get_mdf_bonds(bond_records, line) ) {
00125     return -1;
00126   }
00127   
00128   for ( tmp = bond_records; (tmp = strchr(tmp, ' ')) != NULL;
00129         tmp++ ) {
00130     bonds++;
00131   }
00132 
00133   return bonds;
00134 }
00135 
00136 // Open the file and create the mdf struct used to pass data to the other
00137 // functions.
00138 static void *open_mdf_read(const char *path, const char *filetype, 
00139     int *natoms) {
00140   FILE *fd;
00141   mdfdata *mdf;
00142   long mol_data_location;
00143   char line[LINESIZE]; 
00144   int nmols = 0;
00145 
00146   fd = fopen(path, "r");
00147   if (!fd)
00148     return NULL;
00149   
00150   // Find the first molecule record
00151   do {
00152     fgets(line, LINESIZE, fd);
00153     if ( ferror(fd) || feof(fd) ) {
00154       fprintf(stderr, "mdfplugin: No molecule record found in file.\n");
00155       return NULL;
00156     }
00157   } while ( strncmp(line, "@molecule", 9) );
00158 
00159   // Remember the location of the beginning of the molecule data
00160   mol_data_location = ftell(fd);
00161 
00162   // Count the atoms in each molecule
00163   while ( line[0] != '#' ) {
00164     fgets(line, LINESIZE, fd);
00165 
00166     // Count atoms until a new molecule or the end of the section is reached
00167     while ( (line[0] != '@') && (line[0] != '#') ) {
00168       // Ignore blank and comment lines
00169       if ( !isspace(line[0]) && (line[0] != '!') )
00170         *natoms = *natoms + 1;
00171       fgets(line, LINESIZE, fd);
00172       if ( ferror(fd) || feof(fd) ) {
00173         fprintf(stderr, "mdfplugin: Error while counting atoms.\n");
00174         return NULL;
00175       }
00176     }
00177     nmols++;
00178   }
00179 
00180   // Allocate and initialize the mdf structure
00181   mdf = new mdfdata;
00182   mdf->file = fd;
00183   mdf->natoms = *natoms;
00184   mdf->nmols = nmols;
00185   mdf->from = NULL;
00186   mdf->to = NULL;
00187   mdf->mol_data_location = mol_data_location; 
00188 
00189   return mdf;
00190 }
00191 
00192 // Read the atom information for each molecule, but not bonds.
00193 // XXX - this ignores the column records, which may cause the atom records
00194 // to be read incorrectly.
00195 static int read_mdf_structure(void *v, int *optflags, molfile_atom_t *atoms) {
00196   mdfdata *mdf = (mdfdata *)v;
00197   char line[LINESIZE];
00198   int mol_num;
00199   molfile_atom_t *atom = atoms;
00200 
00201   *optflags = MOLFILE_OCCUPANCY | MOLFILE_CHARGE;
00202 
00203   // Seek to the first molecule record
00204   fseek(mdf->file, mdf->mol_data_location, SEEK_SET);
00205   line[0] = '\0';
00206 
00207   // Read the atom structure for each molecule
00208   mol_num = 0;
00209   while ( line[0] != '#' ) {
00210     fgets(line, LINESIZE, mdf->file);
00211 
00212     // Read atom structure for the current molecule
00213     while ( (line[0] != '@') && (line[0] != '#') ) {
00214       // Ignore blank and comment lines
00215       if ( !isspace(line[0]) && (line[0] != '!') ) {
00216         if ( !read_mdf_structure_line(atom, line) ) {
00217           fprintf(stderr, "mdfplugin: Improperly formatted atom record encountered while reading structure.\n");
00218           return MOLFILE_ERROR;
00219         }
00220 
00221         // XXX - use the chain name to identify different molecules
00222         sprintf(atom->chain, "%d", mol_num);
00223 
00224         atom++;
00225       }
00226 
00227       fgets(line, LINESIZE, mdf->file);
00228       if ( ferror(mdf->file) || feof(mdf->file) ) {
00229         fprintf(stderr, "mdfplugin: File error while reading structure.\n");
00230         return MOLFILE_ERROR;
00231       }
00232     }
00233     mol_num++;
00234   }
00235 
00236   return MOLFILE_SUCCESS;
00237 }
00238 
00239 // Create arrays of one-based bond indicies.
00240 static int read_mdf_bonds(void *v, int *nbonds, int **from_data, int **to_data, float **bondorderptr) {
00241   mdfdata *mdf = (mdfdata *)v;
00242   int mol, atom, bond_count, *fromptr, *toptr, tmp_to;
00243   char *curr, *next, line[LINESIZE], bond_records[LINESIZE];
00244   char (*atomnames)[NAMESIZE]; // Dynamic array of cstrings
00245   hash_t *hasharray;           // Array of hash tables
00246 
00247   // Allocate and initialize the hash table for each molecule.
00248   hasharray = new hash_t[mdf->nmols];
00249   for (mol = 0; mol < mdf->nmols; mol++) {
00250     hash_init(&hasharray[mol], 256);
00251   }
00252   atomnames = new char[mdf->natoms][NAMESIZE];
00253 
00254   // Populate the hash table; key: atom name; value: one-based atom index.
00255   // Count the bonds, each bond is counted twice.
00256   fseek(mdf->file, mdf->mol_data_location, SEEK_SET);
00257   line[0] = '\0';
00258   atom = 1;
00259   mol = 0;
00260   bond_count = 0;
00261   while ( line[0] != '#' ) {
00262     fgets(line, LINESIZE, mdf->file);
00263 
00264     // Read the atom names
00265     while ( (line[0] != '@') && (line[0] != '#') ) {
00266       // Ignore blank and comment lines
00267       if ( !isspace(line[0]) && (line[0] != '!') ) {
00268         if ( sscanf(line, "%s %*s", atomnames[atom-1]) != 1 ) {
00269           fprintf(stderr, "mdfplugin: Improperly formatted atom record encountered while reading bonds.\n");
00270           return MOLFILE_ERROR;
00271         }
00272         if ( hash_insert(&hasharray[mol], atomnames[atom-1], atom) != HASH_FAIL ) {
00273           fprintf(stderr, "mdfplugin: Could not add atom to hash table.\n");
00274           return MOLFILE_ERROR;
00275         }
00276 
00277         bond_count += count_mdf_bonds(line);
00278         atom++;
00279       }
00280 
00281       fgets(line, LINESIZE, mdf->file);
00282       if ( ferror(mdf->file) || feof(mdf->file) ) {
00283         fprintf(stderr, "mdfplugin: File error while reading bonds.\n");
00284         return MOLFILE_ERROR;
00285       }
00286     }
00287 
00288     mol++;
00289   }
00290 
00291   bond_count /= 2;
00292   mdf->from = new int[bond_count];
00293   mdf->to = new int[bond_count];
00294   fromptr = mdf->from;
00295   toptr = mdf->to;
00296 
00297   // Read the molecules, storing the bond-indicies in fromptr and toprt
00298   fseek(mdf->file, mdf->mol_data_location, SEEK_SET);
00299   line[0] = '\0';
00300   atom = 1;
00301   mol = 0;
00302   while ( line[0] != '#' ) {
00303     fgets(line, LINESIZE, mdf->file);
00304 
00305     // Read the bonds
00306     while ( (line[0] != '@') && (line[0] != '#') ) {
00307       // Ignore blank and comment lines
00308       if ( !isspace(line[0]) && (line[0] != '!') ) {
00309         if ( !get_mdf_bonds(bond_records, line) ) {
00310           fprintf(stderr, "mdfplugin: Error reading bonds from atom data.\n");
00311           return MOLFILE_ERROR;
00312         }
00313 
00314         // Read each bond in the line
00315         for ( curr = bond_records; (next = strchr(curr, ' ')) != NULL; 
00316               curr = next+1 ) {
00317           *next = '\0';
00318           tmp_to = hash_lookup(&hasharray[mol], curr);
00319           if (tmp_to == HASH_FAIL) {
00320             fprintf(stderr, "mdfplugin: Could not find atom in hash table.\n");
00321             return MOLFILE_ERROR;
00322           }
00323           else if (tmp_to > atom) {
00324             // Only count bonds to atoms greater than the current one, since
00325             // each bond is listed twice
00326             *fromptr = atom;
00327             *toptr = tmp_to;
00328             fromptr++;
00329             toptr++;
00330           }
00331         }
00332 
00333         atom++;
00334       }
00335 
00336       fgets(line, LINESIZE, mdf->file);
00337       if ( ferror(mdf->file) || feof(mdf->file) ) {
00338         fprintf(stderr, "mdfplugin: File error while reading bonds.\n");
00339         return MOLFILE_ERROR;
00340       }
00341     }
00342 
00343     mol++;
00344   }
00345 
00346   for (mol = 0; mol < mdf->nmols; mol++) {
00347     hash_destroy(&hasharray[mol]);
00348   }
00349   delete [] hasharray;
00350   delete [] atomnames;
00351 
00352   *nbonds = bond_count;
00353   *from_data = mdf->from;
00354   *to_data = mdf->to;
00355   *bondorderptr = NULL; // not implemented yet
00356 
00357   return MOLFILE_SUCCESS;
00358 }
00359 
00360 // Free the memory used by the mdf structure
00361 static void close_mdf_read(void *v) {
00362   mdfdata *mdf = (mdfdata *)v;
00363   if (mdf) {
00364     if (mdf->file) fclose(mdf->file);
00365     if (mdf->from) delete [] mdf->from;
00366     if (mdf->to)   delete [] mdf->to;
00367     delete mdf;
00368   }
00369 }
00370 
00371 // Plugin Initialization
00372 static molfile_plugin_t plugin = {
00373   vmdplugin_ABIVERSION,   /* ABI version */
00374   MOLFILE_PLUGIN_TYPE,    /* type */
00375   "mdf",                  /* short name */
00376   "InsightII MDF",        /* pretty name */
00377   "Eamon Caddigan",       /* author */
00378   0,                      /* major version */
00379   2,                      /* minor version */
00380   VMDPLUGIN_THREADSAFE,   /* is_reentrant */
00381   "mdf",                  /* filename extension */
00382 };
00383 
00384 VMDPLUGIN_API int VMDPLUGIN_init(void) { return VMDPLUGIN_SUCCESS; }
00385 VMDPLUGIN_API int VMDPLUGIN_fini(void) { return VMDPLUGIN_SUCCESS; }
00386 VMDPLUGIN_API int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) {
00387   plugin.open_file_read = open_mdf_read;
00388   plugin.read_structure = read_mdf_structure;
00389   plugin.read_bonds = read_mdf_bonds;
00390   plugin.close_file_read = close_mdf_read;
00391   (*cb)(v, (vmdplugin_t *)&plugin);
00392   return VMDPLUGIN_SUCCESS;
00393 }
00394 

Generated on Wed Mar 22 13:15:30 2006 for VMD Plugins (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002