Main Page   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

ReadMDLMol.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr                                                                       
00003  *cr            (C) Copyright 1995-2006 The Board of Trustees of the           
00004  *cr                        University of Illinois                       
00005  *cr                         All Rights Reserved                        
00006  *cr                                                                   
00007  *cr    Portions contributed and copyright (C) 1998 by Andrew Dalke and
00008  *cr    Bioreason, Inc.                                                
00009  *cr                                                                   
00010  *cr    Some information comes from the Daylight Information Systems'
00011  *cr    contrib program "mol2smi" which was placed into the public domain.
00012  *cr                                                                   
00013  ***************************************************************************/
00014 
00015 /***************************************************************************
00016  * RCS INFORMATION:
00017  *
00018  *      $RCSfile: ReadMDLMol.C,v $
00019  *      $Author: johns $        $Locker:  $             $State: Exp $
00020  *      $Revision: 1.10 $       $Date: 2003/12/31 20:14:01 $
00021  *
00022  ***************************************************************************
00023  * DESCRIPTION:
00024  *
00025  *  Read and write from the MDL ".mol" file format
00026  *
00027  ***************************************************************************/
00028 
00029 #include "ReadMDLMol.h"
00030 #include "Molecule.h"
00031 #include "Atom.h"
00032 
00033 int read_mdl_header(FILE *infile, int *natoms, int *nbonds)
00034 {
00035   // skip the first three lines, the slow way
00036   int i;
00037   for (int count = 0; count < 3; count++) {
00038     while (1) {
00039       i = fgetc(infile);
00040       //printf("Read: %c\n", i);
00041       if (i == EOF) {
00042         return 0;
00043       }
00044       if (i == '\n') {
00045         break;
00046       }
00047     }
00048   }
00049   // read the sizes
00050   i = fscanf(infile, "%d %d\n", natoms, nbonds);
00051   if (i != 2) {
00052     return 0;
00053   }
00054   return 1;
00055 }
00056 
00057 int write_mdl_atom(FILE *outfile, const mdl_atom& atom)
00058 {
00059 
00060   int i = fprintf(outfile,
00061                   "%10.4f%10.4f%10.4f %-3s%2d%3d%3d%3d%3d%3d\n",
00062                   atom.x, atom.y, atom.z,
00063                   atom.symbol, atom.mass_difference,
00064                   atom.charge, atom.stereo_parity,
00065                   atom.hcount, atom.stereo_care_box,
00066                   atom.valence);
00067   return i > 0;
00068     
00069 }
00070 
00071 int read_mdl_atom(FILE *infile, mdl_atom *atom)
00072 {
00073   // read the line
00074   //    From the Daylight contrib code 'mol2smi'
00075   //  ATOM LOOP: Note that atom indices start at one...  If an
00076   //  atom is not interpreted, go to next CT.  Charge is decoded
00077   //  by the formula: charge = (4 - charge_code)  Note that
00078   //  atoms are added with (hcount) implicit hydrogens.
00079   //
00080   //  The format is:
00081   //  xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvv
00082   //  12345678901234567890123456789012345678901234567890123456789
00083   //          |10       |20       |30       |40       |50        
00084   //
00085   //  xyz's   - coordinates           sss    - stereo parity 
00086   //  aaa     - atom symbol           hhh    - (hcount + 1)
00087   //  dd      - mass difference       bbb    - stereo care box
00088   //  ccc     - (4 - charge)          vvv    - valence
00089   //
00090 
00091   // I forgot how to do this correctly, and it isn't very important
00092   char s[201];
00093   if (!fgets(s, 200, infile)) {
00094     return 0;
00095   }
00096   
00097   int i = sscanf(s, "%10f%10f%10f %3s%2d%3d%3d%3d%3d%3d",
00098                  &(atom->x), &(atom->y), &(atom->z),
00099                  &(atom->symbol), &(atom->mass_difference),
00100                  &(atom->charge), &(atom->stereo_parity),
00101                  &(atom->hcount), &(atom->stereo_care_box),
00102                  &(atom->valence));
00103 
00104   return i >= 8;
00105 }
00106 
00107 int read_mdl_bond(FILE *infile, mdl_bond *bond)
00108 {
00109   // Again, quoting from mol2smi
00110   // BOND LOOP: 
00111   //  
00112   //  The format is:
00113   //  111222tttsssxxxrrrccc
00114   //  12345678901234567890123456789
00115   //           |10       |20       
00116   //
00117   //  111  -  bond_from
00118   //  222  -  bond_to
00119   //  ttt  -  bond_type
00120   //  sss  -  bond_stereo
00121   // I'm not sure how the other fields are used.
00122 
00123   char s[201];
00124   if (!fgets(s, 200, infile)) {
00125     return 0;
00126   }
00127   int i = sscanf(s, "%3d%3d%3d%3d",
00128                  &(bond->bond_from), &(bond->bond_to),
00129                  &(bond->bond_type), &(bond->bond_stereo)
00130                  );
00131   return i == 4;
00132 }
00133 int write_mdl_bond(FILE *infile, const mdl_bond& bond)
00134 {
00135   int i = fprintf(infile, "%3d%3d%3d%3d\n",
00136                   bond.bond_from, bond.bond_to,
00137                   bond.bond_type, bond.bond_stereo
00138                   );
00139   return i > 0;
00140 }
00141 
00142 int write_mdl_trailer(FILE *outfile)
00143 {
00144   int i = fprintf(outfile, "\n%%%%\n");
00145   return i > 0;
00146 }
00147 
00148 
00149 int molecule_mdl(Molecule *mol, const char *molfile) {
00150   FILE *infile = fopen(molfile, "rt");
00151   if (!infile) {
00152     return FALSE;
00153   }
00154   int num_atoms, num_bonds;
00155 
00156   if (!read_mdl_header(infile, &num_atoms, &num_bonds)) {
00157     fclose(infile);
00158     return FALSE;
00159   }
00160   int i;
00161 
00162   mdl_atom atom;
00163   float pos[3];
00164 
00165   float newdata[ATOMEXTRA];
00166   newdata[ATOMBETA] = mol->default_beta();
00167   newdata[ATOMOCCUP] = mol->default_occup();
00168 
00169   for (i=0; i<num_atoms; i++) {
00170     if (!read_mdl_atom(infile, &atom)) {
00171       fclose(infile);
00172       return FALSE;
00173     }
00174     pos[0] = atom.x;
00175     pos[1] = atom.y;
00176     pos[2] = atom.z;
00177 
00178     newdata[ATOMCHARGE] = (float) 4.0 - atom.charge;
00179     newdata[ATOMMASS] = mol->default_mass(atom.symbol) + atom.mass_difference;
00180     newdata[ATOMRAD] = mol->default_radius(atom.symbol);
00181     mol->add_atom(atom.symbol, atom.symbol, "UNK", 0, " ", "UNK", pos,
00182              newdata);
00183   }
00184   
00185   mdl_bond bond;
00186   for (i=0; i<num_bonds; i++) {
00187     if (!read_mdl_bond(infile, &bond)) {
00188       fclose(infile);
00189       return FALSE;
00190     }
00191     int bf = bond.bond_from - 1;
00192     int bt = bond.bond_to - 1;
00193     if (bf >= 0 && bf < num_atoms &&
00194         bt >= 0 && bt < num_atoms) {
00195       mol->add_bond(bond.bond_from, bond.bond_to);
00196     }
00197   }
00198 
00199   fclose(infile);
00200   return TRUE;
00201 }
00202 
00203 
00204 
00205 

Generated on Wed Mar 22 13:15:31 2006 for VMD Plugins (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002