Main Page   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

cdfplugin.c

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2006 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: cdfplugin.c,v $
00013  *      $Author: johns $       $Locker:  $             $State: Exp $
00014  *      $Revision: 1.16 $       $Date: 2006/03/22 19:08:22 $
00015  *
00016  ***************************************************************************/
00017 
00018 /*
00019  * NetCDF based trajectories, used by AMBER 9, MMTK, etc.
00020  */
00021 
00022 #include <netcdf.h>
00023 #include <stdio.h>
00024 #include <stdlib.h>
00025 #include <string.h>
00026 #include <ctype.h>
00027 #include "molfile_plugin.h"
00028 
00029 #define CDF_TYPE_UNKNOWN 0
00030 #define CDF_TYPE_AMBER   1
00031 #define CDF_TYPE_MMTK    2
00032 
00033 typedef struct {
00034   int step_numberdimid;
00035   size_t step_numberdim;
00036   int minor_step_numberdimid;
00037   size_t minor_step_numberdim;
00038   int atom_numberdimid;
00039   size_t atom_numberdim;
00040   int xyzdimid;
00041   size_t xyzdim;
00042   int box_size_lengthdimid;
00043   size_t box_size_lengthdim;
00044   int description_lengthdimid;
00045   size_t description_lengthdim;
00046   char *description;
00047   int description_id;
00048   int step_id;
00049   int time_id;
00050   int box_size_id;
00051   int configuration_id;
00052   int has_box;
00053   char *comment;
00054 } mmtkdata;
00055 
00056 typedef struct {
00057   int has_box;
00058   int atomdimid;
00059   size_t atomdim;
00060   int spatialdimid;
00061   size_t spatialdim;
00062   int framedimid;
00063   size_t framedim;
00064   char *conventionversion;
00065   char *title;
00066   char *application;
00067   char *program;
00068   char *programversion;
00069   int spatial_id;      /* "xyz" */
00070   int time_id;         /* frame time in picoseconds */
00071   int coordinates_id;  /* coords in angstroms */
00072   int cell_lengths_id; /* cell lengths in angstroms */
00073   int cell_angles_id;  /* cell angles in degrees */
00074   int velocities_id;   /* velocities in angstroms/picosecond */
00075 } amberdata;
00076 
00077 
00078 typedef struct {
00079   /* sub-format independent data */
00080   int ncid;
00081   int type;
00082   int natoms; 
00083   int curframe;
00084   char *conventions;
00085 
00086   /* stuff used by AMBER */
00087   amberdata amber;
00088 
00089   /* stuff used by MMTK */
00090   mmtkdata mmtk;
00091 
00092 } cdfdata;
00093 
00094 
00095 static void close_cdf_read(void *mydata) {
00096   cdfdata *cdf = (cdfdata *)mydata;
00097 
00098   nc_close(cdf->ncid);
00099 
00100   /* AMBER stuff */
00101   if (cdf->amber.title)
00102     free(cdf->amber.title);
00103 
00104   if (cdf->amber.application)
00105     free(cdf->amber.application);
00106 
00107   if (cdf->amber.program)
00108     free(cdf->amber.program);
00109 
00110   if (cdf->amber.programversion)
00111     free(cdf->amber.programversion);
00112 
00113   if (cdf->amber.conventionversion)
00114     free(cdf->amber.conventionversion);
00115 
00116   /* MMTK stuff */
00117   if (cdf->mmtk.comment)
00118     free(cdf->mmtk.comment);
00119 
00120   /* format independent stuff */
00121   if (cdf->conventions)
00122     free(cdf->conventions);
00123 
00124   free(cdf);
00125 }
00126 
00127 
00128 
00129 static int open_amber_cdf_read(cdfdata *cdf) {
00130   int rc;
00131   size_t len; 
00132   amberdata *amber = &cdf->amber;
00133 
00134   /* global attrib: "ConventionVersion" -- required */
00135   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "ConventionVersion", &len);
00136   if (rc == NC_NOERR && len > 0) {
00137     amber->conventionversion = (char *) malloc((len+1) * sizeof(char));
00138     nc_get_att_text(cdf->ncid, NC_GLOBAL, "ConventionVersion", amber->conventionversion);
00139     amber->conventionversion[len] = '\0';
00140     printf("cdfplugin) trajectory follows AMBER conventions version %s\n", amber->conventionversion);
00141   } else {
00142     return -1;
00143   }
00144 
00145   /* at this point we know that this is an AMBER trajectory */
00146   cdf->type = CDF_TYPE_AMBER;
00147 
00148   /* global attrib: "program" -- required */
00149   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "program", &len);
00150   if (rc == NC_NOERR && len > 0) {
00151     amber->program = (char *) malloc((len+1) * sizeof(char));
00152     nc_get_att_text(cdf->ncid, NC_GLOBAL, "program", amber->program);
00153     amber->program[len] = '\0';
00154     printf("cdfplugin) program %s\n", amber->program);
00155   } else {
00156     return -1;
00157   }
00158 
00159 
00160   /* global attrib: "programVersion" -- required */
00161   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "programVersion", &len);
00162   if (rc == NC_NOERR && len > 0) {
00163     amber->programversion = (char *) malloc((len+1) * sizeof(char));
00164     nc_get_att_text(cdf->ncid, NC_GLOBAL, "programVersion", amber->programversion);
00165     amber->programversion[len] = '\0';
00166     printf("cdfplugin) program version %s\n", amber->programversion);
00167   } else {
00168     return -1;
00169   }
00170 
00171 
00172   /* global attrib: "title" -- optional */
00173   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "title", &len);
00174   if (rc == NC_NOERR && len > 0) {
00175     amber->title = (char *) malloc((len+1) * sizeof(char));
00176     nc_get_att_text(cdf->ncid, NC_GLOBAL, "title", amber->title);
00177     amber->title[len] = '\0';
00178     printf("cdfplugin) title: %s\n", amber->title);
00179   } 
00180 
00181 
00182   /* global attrib: "application" -- optional */
00183   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "application", &len);
00184   if (rc == NC_NOERR && len > 0) {
00185     amber->application = (char *) malloc((len+1) * sizeof(char));
00186     nc_get_att_text(cdf->ncid, NC_GLOBAL, "application", amber->application);
00187     amber->application[len] = '\0';
00188     printf("cdfplugin) application %s\n", amber->application);
00189   } 
00190 
00191 
00192 /* XXX lots of additional error checking is needed below... */
00193 
00194   /* read in spatial dimension */
00195   rc = nc_inq_dimid(cdf->ncid, "spatial", &amber->spatialdimid);
00196   if (rc == NC_NOERR) {    
00197     rc = nc_inq_dimlen(cdf->ncid, amber->spatialdimid, &amber->spatialdim);
00198     if (rc == NC_NOERR)
00199       printf("cdfplugin) spatial dimension: %ld\n", (long)amber->spatialdim);
00200     else 
00201       return -1;
00202   } else {
00203     return -1;
00204   }
00205  
00206   /* read in atom dimension */
00207   rc = nc_inq_dimid(cdf->ncid, "atom", &amber->atomdimid);
00208   if (rc == NC_NOERR) {    
00209     rc = nc_inq_dimlen(cdf->ncid, amber->atomdimid, &amber->atomdim);
00210     if (rc == NC_NOERR) {
00211       printf("cdfplugin) atom dimension: %ld\n", (long)amber->atomdim);
00212       cdf->natoms = amber->atomdim; /* copy to format independent part */
00213     } else  {
00214       return -1;
00215     }
00216   } else {
00217     return -1;
00218   }
00219  
00220 
00221   /* read in frame dimension */
00222   rc = nc_inq_dimid(cdf->ncid, "frame", &amber->framedimid);
00223   if (rc == NC_NOERR) {    
00224     rc = nc_inq_dimlen(cdf->ncid, amber->framedimid, &amber->framedim);
00225     if (rc == NC_NOERR)
00226       printf("cdfplugin) frame dimension: %ld\n", (long)amber->framedim);
00227     else 
00228       return -1;
00229   } else {
00230     return -1;
00231   }
00232 
00233   /* 
00234    * get ID values for all of the variables we're interested in 
00235    */
00236 #if 0
00237   /* VMD can live without this, we're assuming it's always 3 */
00238   rc = nc_inq_varid(cdf->ncid, "spatial",       &amber->spatial_id);
00239   if (rc != NC_NOERR)
00240     return -1;
00241 #endif
00242 
00243   /* VMD requires coordinates at a minimum */
00244   rc = nc_inq_varid(cdf->ncid, "coordinates",   &amber->coordinates_id);
00245   if (rc != NC_NOERR)
00246     return -1;
00247 
00248 #if 0
00249   /* we don't need velocities at this time */
00250   rc = nc_inq_varid(cdf->ncid, "velocities",    &amber->velocities_id);
00251   if (rc != NC_NOERR)
00252     return -1;
00253 #endif
00254 
00255   /* optional periodic cell info */
00256   rc = nc_inq_varid(cdf->ncid, "cell_lengths",  &amber->cell_lengths_id);
00257   if (rc == NC_NOERR) {
00258     rc = nc_inq_varid(cdf->ncid, "cell_angles",   &amber->cell_angles_id);
00259     if (rc == NC_NOERR) {
00260       printf("cdfplugin) trajectory contains periodic cell information\n");
00261       amber->has_box = 1;
00262     }
00263   }
00264 
00265   return 0;
00266 }
00267 
00268 
00269 static int open_mmtk_cdf_read(cdfdata *cdf) {
00270   int rc;
00271   size_t len; 
00272   mmtkdata *mmtk = &cdf->mmtk;
00273 
00274   /* read in spatial dimension */
00275   rc = nc_inq_dimid(cdf->ncid, "xyz", &mmtk->xyzdimid);
00276   if (rc == NC_NOERR) {
00277     rc = nc_inq_dimlen(cdf->ncid, mmtk->xyzdimid, &mmtk->xyzdim);
00278     if (rc == NC_NOERR)
00279       printf("cdfplugin) xyz dimension: %ld\n", (long)mmtk->xyzdim);
00280     else 
00281       return -1;
00282   } else {
00283     return -1;
00284   }
00285 
00286 
00287   /* read in atom dimension */
00288   rc = nc_inq_dimid(cdf->ncid, "atom_number", &mmtk->atom_numberdimid); 
00289   if (rc == NC_NOERR) {
00290     rc = nc_inq_dimlen(cdf->ncid, mmtk->atom_numberdimid, &mmtk->atom_numberdim);
00291     if (rc == NC_NOERR) {
00292       printf("cdfplugin) atom_number dimension: %ld\n", (long)mmtk->atom_numberdim);
00293       cdf->natoms = mmtk->atom_numberdim; /* copy to format independent part */
00294     } else {
00295       return -1;
00296     }
00297   } else {
00298     return -1;
00299   }
00300 
00301 
00302   /* read in frame dimension */
00303   rc = nc_inq_dimid(cdf->ncid, "step_number", &mmtk->step_numberdimid);
00304   if (rc == NC_NOERR) {
00305     rc = nc_inq_dimlen(cdf->ncid, mmtk->step_numberdimid, &mmtk->step_numberdim);
00306     if (rc == NC_NOERR)
00307       printf("cdfplugin) step_number dimension: %ld\n", (long)mmtk->step_numberdim);
00308     else 
00309       return -1;
00310   } else {
00311     return -1;
00312   }
00313 
00314 
00315   /* read in minor step number dimension */
00316   rc = nc_inq_dimid(cdf->ncid, "minor_step_number", &mmtk->minor_step_numberdimid);
00317   if (rc == NC_NOERR) {
00318     rc = nc_inq_dimlen(cdf->ncid, mmtk->minor_step_numberdimid, &mmtk->minor_step_numberdim);
00319     if (rc == NC_NOERR)
00320       printf("cdfplugin) minor_step_number dimension: %ld\n", (long)mmtk->minor_step_numberdim);
00321     else 
00322       return -1;
00323   } else if (rc == NC_EBADDIM) {
00324     printf("cdfplugin) no minor_step_number dimension\n");
00325     mmtk->minor_step_numberdim = 0;
00326   } else {
00327     return -1;
00328   }
00329 
00330 
00331   /* read in description_length dimension */
00332   rc = nc_inq_dimid(cdf->ncid, "description_length", &mmtk->description_lengthdimid); 
00333   if (rc == NC_NOERR) {
00334     rc = nc_inq_dimlen(cdf->ncid, mmtk->description_lengthdimid, &mmtk->description_lengthdim);
00335     if (rc == NC_NOERR)
00336       printf("cdfplugin) description_length dimension: %ld\n", (long)mmtk->description_lengthdim);
00337     else
00338       return -1;
00339   } else {
00340     return -1;
00341   }
00342 
00343 
00344   /* get ID values for all of the variables we're interested in */
00345   rc = nc_inq_varid(cdf->ncid, "configuration", &mmtk->configuration_id);
00346   if (rc != NC_NOERR)
00347     return -1;
00348 
00349   rc = nc_inq_varid(cdf->ncid, "description", &mmtk->description_id);
00350   if (rc != NC_NOERR)
00351     return -1;
00352 
00353   /* check for PBC */
00354   rc = nc_inq_varid(cdf->ncid, "box_size", &mmtk->box_size_id);
00355   if (rc == NC_NOERR) {
00356     mmtk->has_box = 1;
00357     printf("cdfplugin) system has periodic boundary conditions\n");
00358   }
00359   else if (rc == NC_ENOTVAR)
00360     mmtk->has_box = 0;
00361   else
00362     return -1;
00363 
00364 
00365   /* global attrib: "comment" -- optional */
00366   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "comment", &len);
00367   if (rc == NC_NOERR && len > 0) {
00368     mmtk->comment = (char *) malloc((len+1) * sizeof(char));
00369     nc_get_att_text(cdf->ncid, NC_GLOBAL, "comment", mmtk->comment);
00370     mmtk->comment[len] = '\0';
00371     printf("cdfplugin) comment: %s\n", mmtk->comment);
00372   } 
00373 
00374   /* at this point we know that this is an MMTK trajectory */
00375   cdf->type = CDF_TYPE_MMTK;
00376 
00377   printf("cdfplugin) CDF file appears to be an MMTK trajectory\n");
00378 
00379   return 0;
00380 }
00381 
00382  
00383 static void *open_cdf_read(const char *filename, const char *filetype, 
00384                            int *natoms) {
00385   int ncid, rc;
00386   size_t len;
00387   cdfdata *cdf;
00388  
00389   rc = nc_open(filename, NC_NOWRITE, &ncid);
00390   if (rc != NC_NOERR) return NULL;
00391 
00392   cdf = (cdfdata *) malloc(sizeof(cdfdata));
00393   memset(cdf, 0, sizeof(cdfdata));
00394 
00395   cdf->ncid = ncid;
00396   cdf->type = CDF_TYPE_UNKNOWN;
00397 
00398 
00399   /* Determine what NetCDF conventions apply to this data, if any */
00400   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "Conventions", &len);
00401   if (rc == NC_NOERR && len > 0) {
00402     cdf->conventions = (char *) malloc((len+1) * sizeof(char));
00403     nc_get_att_text(cdf->ncid, NC_GLOBAL, "Conventions", cdf->conventions);
00404     cdf->conventions[len] = '\0';
00405     printf("cdfplugin) conventions: '%s'\n", cdf->conventions);
00406   } else {
00407     printf("cdfplugin) no conventions specified, unknown file structure\n");
00408   }
00409 
00410   /* Check if this is a file generated by AMBER */
00411   if (cdf->conventions != NULL && strstr("AMBER", cdf->conventions) != NULL) {
00412     if (!open_amber_cdf_read(cdf)) {
00413       *natoms = cdf->natoms;
00414       return cdf;
00415     }
00416   } 
00417 
00418   /* If not AMBER, then maybe it's from MMTK */
00419   if (!open_mmtk_cdf_read(cdf)) {
00420     *natoms = cdf->natoms;
00421     return cdf;
00422   } 
00423 
00424   /* if no conventions are recognized, then we free everything */
00425   /* and return failure                                        */
00426   close_cdf_read(cdf);
00427 
00428   return NULL; 
00429 }
00430 
00431 /* A very basic bracket counter. It assumes that the expression
00432    is syntactically correct. */
00433 static char *find_closing_bracket(char *s) {
00434   int count = 1;
00435   while (*s && count > 0) {
00436     if (*s == '(' || *s == '[')
00437       count++;
00438     if (*s == ')' || *s == ']')
00439       count--;
00440     s++;
00441   }
00442   return s;
00443 }
00444 
00445 /* Simple string replacement routine for fixing atom names. */
00446 static void atom_name_replace(char *name, char *substring, char letter) {
00447   char *s = strstr(name, substring);
00448   if (s != NULL) {
00449     *s = letter;
00450     strcpy(s+1, s+strlen(substring));
00451   }
00452 }
00453 
00454 static void atom_name_remove_underscores(char *name) {
00455   char *s = name;
00456   while (1) {
00457     s = strchr(s, '_');
00458     if (s == NULL)
00459       break;
00460     strcpy(s, s+1);
00461   }
00462 }
00463 
00464 /* Set chainid, resname, and resnum for a range of atoms
00465    and fix atom names. */
00466 static void set_atom_attributes(molfile_atom_t *atoms, int natoms,
00467                                 char **atom_pointers, char chain_id,
00468                                 char *resname, int resnum,
00469                                 char *start, char *end,
00470                                 int name_correction_type) {
00471   int i;
00472   for (i=0; i<natoms; i++)
00473     if (atom_pointers[i] > start && atom_pointers[i] < end) {
00474       molfile_atom_t *atom = atoms + i;
00475       atom->chain[0] = chain_id;      
00476       atom->chain[1] = '\0';      
00477       strcpy(atom->resname, resname);
00478       atom->resid = resnum;
00479       if (name_correction_type == 1 /* proteins */) {
00480         atom_name_replace(atom->name, "_alpha", 'A');
00481         atom_name_replace(atom->name, "_beta", 'B');
00482         atom_name_replace(atom->name, "_gamma", 'G');
00483         atom_name_replace(atom->name, "_delta", 'D');
00484         atom_name_replace(atom->name, "_epsilon", 'E');
00485         atom_name_replace(atom->name, "_zeta", 'Z');
00486         atom_name_replace(atom->name, "_eta", 'H');
00487         atom_name_remove_underscores(atom->name);
00488       }
00489       else if (name_correction_type == 2 /* nucleic acids */) {
00490         if (strcmp(atom->name, "O_1") == 0)
00491           strcpy(atom->name, "O1P");
00492         else if (strcmp(atom->name, "O_2") == 0)
00493           strcpy(atom->name, "O2P");
00494         else if (strcmp(atom->name, "C_1") == 0)
00495           strcpy(atom->name, "C1'");
00496         else if (strcmp(atom->name, "C_2") == 0)
00497           strcpy(atom->name, "C2'");
00498         else if (strcmp(atom->name, "C_3") == 0)
00499           strcpy(atom->name, "C3'");
00500         else if (strcmp(atom->name, "O_3") == 0)
00501           strcpy(atom->name, "O3'");
00502         else if (strcmp(atom->name, "C_4") == 0)
00503           strcpy(atom->name, "C4'");
00504         else if (strcmp(atom->name, "O_4") == 0)
00505           strcpy(atom->name, "O4'");
00506         else if (strcmp(atom->name, "C_5") == 0)
00507           strcpy(atom->name, "C5'");
00508         else if (strcmp(atom->name, "O_5") == 0)
00509           strcpy(atom->name, "O5'");
00510       }
00511     }
00512 }
00513 
00514 /* Get structure from an MMTK trajectory file */
00515 static int read_mmtk_cdf_structure(void *mydata, int *optflags,
00516                                    molfile_atom_t *atoms) {
00517   int i, rc;
00518   molfile_atom_t *atom;
00519   cdfdata *cdf = (cdfdata *) mydata;
00520   mmtkdata *mmtk = &cdf->mmtk;
00521   size_t start[3], count[3];
00522   char *dstr;
00523   char **atom_pointers;
00524   int resnum;
00525   char resname[8];
00526 
00527   *optflags = MOLFILE_NOOPTIONS;
00528 
00529   mmtk->description = (char *) malloc((mmtk->description_lengthdim + 1) * sizeof(char));
00530   if (mmtk->description == NULL) 
00531     return MOLFILE_ERROR;
00532 
00533   start[0] = cdf->curframe; /* frame */
00534   count[0] = mmtk->description_lengthdim;
00535 
00536   rc = nc_get_vara_text(cdf->ncid, mmtk->description_id,
00537                         start, count, mmtk->description);
00538   if (rc != NC_NOERR)
00539     return MOLFILE_ERROR;
00540 
00541   /* initialize all atoms with name "X" to start with */
00542   /* indicating unknown atom types etc..              */
00543   for (i=0; i<cdf->natoms; i++) {
00544     atom = atoms + i;
00545     strncpy(atom->name, "X", sizeof(atom->name)-1);
00546     atom->name[sizeof(atom->name)] = '\0';
00547     strncpy(atom->type, atom->name, sizeof(atom->type)-1);
00548     atom->type[sizeof(atom->type)] = '\0';
00549     atom->resname[0] = '\0';
00550     atom->resid = 1;
00551     atom->chain[0] = '\0';
00552     atom->segid[0] = '\0';
00553   }
00554 
00555   /* Allocate a pointer array that will hold each atom's location in
00556      the description string. This will be used in a second pass through
00557      the description string in which residue names and indices will
00558      be assigned. */
00559   atom_pointers = (char **) malloc(cdf->natoms * sizeof(char *));
00560   if (atom_pointers == NULL)
00561     return MOLFILE_ERROR;
00562 
00563 #if 0
00564   printf("cdfplugin) MMTK description:\n%s\n", mmtk->description);
00565   printf("cdfplugin) parsing MMTK structure description...\n");
00566 #endif
00567 
00568   /* First pass: look only at atoms */
00569   dstr = mmtk->description;
00570   while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
00571     char *atomstr;
00572     atomstr = strstr(dstr, "A('");
00573 
00574     if (atomstr != NULL) {
00575       char name[1024];
00576       char *nmstart = NULL;
00577       char *nmend = NULL;
00578       char *indstart = NULL;
00579       char *endp = NULL;
00580       int index, len;
00581 
00582       endp = strchr(atomstr, ')');
00583       nmstart = strchr(atomstr, '\'');
00584       if (nmstart != NULL)
00585         nmend = strchr(nmstart+1, '\'');
00586       indstart = strchr(atomstr, ',');
00587       if (endp == NULL || nmstart == NULL || nmend == NULL || indstart == NULL) {
00588         printf("cdfplugin) mmtk_read_structure(): unable to parse atom tag\n");
00589         break; /* something went wrong */
00590       }
00591 
00592       len = nmend - nmstart - 1;
00593       if (len > sizeof(name)) {
00594         printf("cdfplugin) mmtk_read_structure(): bad length: %d\n", len);
00595         break; /* something went wrong */
00596       }
00597       memcpy(name, nmstart+1, len); 
00598       name[len] = '\0';
00599 
00600       index = -1;
00601       sscanf(indstart, ",%d)", &index);
00602       atom_pointers[index] = atomstr;
00603 
00604 #if 0 
00605       printf("atom: %s, %d\n", name, index);
00606 #endif
00607 
00608       if (index >= 0 && index < cdf->natoms) {
00609         atom = atoms + index;
00610         strncpy(atom->name, name, sizeof(atom->name)-1);
00611         atom->name[sizeof(atom->name)] = '\0';
00612         strncpy(atom->type, atom->name, sizeof(atom->type)-1);
00613         atom->type[sizeof(atom->type)] = '\0';
00614       }
00615 
00616       dstr = atomstr+1;
00617     } else {
00618 #if 0
00619       printf("no more atom records found\n");
00620 #endif
00621       break; /* nothing found */
00622     }
00623   }
00624 
00625   /* Second pass: peptide chains */
00626   dstr = mmtk->description;
00627   while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
00628     char *peptide, *pend;
00629     char *group, *gend;
00630     char *nmstart, *nmend;
00631     char chain_id = 'A';
00632     char *s;
00633 
00634     peptide = strstr(dstr, "S('");
00635     if (peptide == NULL)
00636       break;
00637     pend = find_closing_bracket(peptide+2);
00638 
00639     resnum = 1;
00640     group = peptide;
00641     while (1) {
00642       group = strstr(group, "G('");
00643       if (group == NULL || group >= pend)
00644         break;
00645       gend = find_closing_bracket(group+2);
00646       nmstart = strchr(group, '\'') + 1;
00647       nmend = strchr(nmstart, '\'');
00648       while (nmend > nmstart && isdigit(*(nmend-1)))
00649         nmend--;
00650       if (nmend-nmstart > 7)
00651         nmend = nmstart+7;
00652       strncpy(resname, nmstart, nmend-nmstart);
00653       resname[nmend-nmstart] = '\0';
00654       s = resname;
00655       while (*s) {
00656         *s = toupper(*s);
00657         s++;
00658       }
00659 #if 0
00660       printf("%s %d\n", resname, resnum);
00661 #endif
00662       set_atom_attributes(atoms, cdf->natoms, atom_pointers,
00663                           chain_id, resname, resnum, group, gend, 1);
00664       group = gend;
00665       resnum++;
00666     }
00667 
00668     if (chain_id == 'Z')
00669       chain_id = 'A';
00670     else
00671         chain_id++;
00672     dstr = pend;
00673   }
00674 
00675   /* Third pass: nucleic acid chains */
00676   dstr = mmtk->description;
00677   while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
00678     char *nacid, *nend;
00679     char *group, *gend;
00680     char *nmstart, *nmend;
00681     char chain_id = 'a';
00682     char *s;
00683 
00684     nacid = strstr(dstr, "N('");
00685     if (nacid == NULL)
00686       break;
00687     nend = find_closing_bracket(nacid+2);
00688 
00689     resnum = 1;
00690     group = nacid;
00691     while (1) {
00692       group = strstr(group, "G('");
00693       if (group == NULL || group >= nend)
00694         break;
00695       gend = find_closing_bracket(group+2);
00696       nmstart = strchr(group, '\'') + 1;
00697       nmend = strchr(nmstart, '\'');
00698       while (nmend > nmstart && isdigit(*(nmend-1)))
00699         nmend--;
00700       if (nmend > nmstart && nmend[-1] == '_')
00701         nmend--;
00702       if (nmend-nmstart > 7)
00703         nmend = nmstart+7;
00704       strncpy(resname, nmstart, nmend-nmstart);
00705       resname[nmend-nmstart] = '\0';
00706       s = resname;
00707       while (*s) {
00708         *s = toupper(*s);
00709         s++;
00710       }
00711       if (resname[0] == 'R' || resname[0] == 'D') {
00712         switch (resname[1]) {
00713         case 'A':
00714           strcpy(resname, "ADE");
00715           break;
00716         case 'C':
00717           strcpy(resname, "CYT");
00718           break;
00719         case 'G':
00720           strcpy(resname, "GUA");
00721           break;
00722         case 'T':
00723           strcpy(resname, "THY");
00724           break;
00725         case 'U':
00726           strcpy(resname, "URA");
00727           break;
00728         }
00729       }
00730 #if 0
00731       printf("%s %d\n", resname, resnum);
00732 #endif
00733       set_atom_attributes(atoms, cdf->natoms, atom_pointers,
00734                           chain_id, resname, resnum, group, gend, 2);
00735       group = gend;
00736       resnum++;
00737     }
00738 
00739     if (chain_id == 'z')
00740       chain_id = 'a';
00741     else
00742         chain_id++;
00743     dstr = nend;
00744   }
00745 
00746   /* Fourth pass: non-chain molecules */
00747   resnum = 1;
00748   dstr = mmtk->description;
00749   while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
00750     char *molecule, *mend;
00751     char *nmstart, *nmend;
00752 
00753     molecule = strstr(dstr, "M('");
00754     if (molecule == NULL)
00755       break;
00756     mend = find_closing_bracket(molecule+2);
00757     nmstart = strchr(molecule, '\'') + 1;
00758     nmend = strchr(nmstart, '\'');
00759     if (strncmp(nmstart, "water", 5) == 0)
00760       strcpy(resname, "HOH");
00761     else {
00762       if (nmend-nmstart > 7)
00763         nmend = nmstart+7;
00764       strncpy(resname, nmstart, nmend-nmstart);
00765       resname[nmend-nmstart] = '\0';
00766     }
00767 #if 0
00768     printf("%s %d\n", resname, resnum);
00769 #endif
00770     set_atom_attributes(atoms, cdf->natoms, atom_pointers,
00771                         '_', resname, resnum, molecule, mend, 0);
00772     resnum++;
00773     dstr = mend;
00774   }
00775 
00776   free(atom_pointers);
00777 
00778   return MOLFILE_SUCCESS;
00779 }
00780 
00781 
00782 static int read_cdf_structure(void *mydata, int *optflags,
00783                                    molfile_atom_t *atoms) {
00784   cdfdata *cdf = (cdfdata *)mydata;
00785 
00786   switch (cdf->type) {
00787     case CDF_TYPE_AMBER:
00788       return MOLFILE_NOSTRUCTUREDATA; /* not an error, just no data */
00789 
00790     case CDF_TYPE_MMTK:
00791       return read_mmtk_cdf_structure(mydata, optflags, atoms);
00792   }
00793 
00794   return MOLFILE_ERROR;
00795 }
00796 
00797 
00798 static int read_amber_cdf_timestep(void *mydata, int natoms, molfile_timestep_t *ts) {
00799   cdfdata *cdf = (cdfdata *)mydata;
00800   amberdata *amber = &cdf->amber;
00801   int rc;
00802 
00803   /* Read in the atom coordinates and unit cell information */
00804   /* only save coords if we're given a valid ts pointer     */ 
00805   /* otherwise VMD wants us to skip it.                     */
00806   if (ts != NULL) {
00807     size_t start[3], count[3];
00808 
00809     start[0] = cdf->curframe; /* frame */
00810     start[1] = 0;             /* atom */
00811     start[2] = 0;             /* spatial */
00812 
00813     count[0] = 1;
00814     count[1] = amber->atomdim;
00815     count[2] = amber->spatialdim;
00816 
00817     rc = nc_get_vara_float(cdf->ncid, amber->coordinates_id, 
00818                            start, count, ts->coords);
00819     if (rc != NC_NOERR) 
00820       return MOLFILE_ERROR;
00821 
00822     /* Read the PBC box info. */
00823     if (amber->has_box) {
00824       double lengths[3];
00825       double angles[3];
00826 
00827       start[0] = cdf->curframe; /* frame */
00828       start[1] = 0;             /* spatial */
00829 
00830       count[0] = 1;
00831       count[1] = amber->spatialdim;
00832 
00833       rc = nc_get_vara_double(cdf->ncid, amber->cell_lengths_id, 
00834                               start, count, lengths);
00835       if (rc != NC_NOERR) 
00836         return MOLFILE_ERROR;
00837 
00838       rc = nc_get_vara_double(cdf->ncid, amber->cell_angles_id, 
00839                               start, count, angles);
00840       if (rc != NC_NOERR) 
00841         return MOLFILE_ERROR;
00842 
00843       ts->A = lengths[0];
00844       ts->B = lengths[1];
00845       ts->C = lengths[2];
00846 
00847       ts->alpha = angles[0];
00848       ts->beta  = angles[1];
00849       ts->gamma = angles[2];
00850     }
00851   }
00852 
00853   cdf->curframe++;
00854   return MOLFILE_SUCCESS;
00855 }
00856 
00857 
00858 static int read_mmtk_cdf_timestep(void *mydata, int natoms, molfile_timestep_t *ts) {
00859   cdfdata *cdf = (cdfdata *)mydata;
00860   mmtkdata *mmtk = &cdf->mmtk;
00861   int rc;
00862 
00863   /* Read in the atom coordinates and unit cell information */
00864   /* only save coords if we're given a valid ts pointer     */ 
00865   /* otherwise VMD wants us to skip it.                     */
00866   if (ts != NULL) {
00867     size_t start[4], count[4];
00868     int i;
00869 
00870     if (mmtk->minor_step_numberdim == 0) {
00871       start[0] = cdf->curframe; /* step */
00872       start[1] = 0;             /* atom */
00873       start[2] = 0;             /* spatial */
00874       start[3] = 0;             /* minor step */
00875     }
00876     else {
00877       start[0] = cdf->curframe/mmtk->minor_step_numberdim;   /* step */
00878       start[1] = 0;             /* atom */
00879       start[2] = 0;             /* spatial */
00880       start[3] = cdf->curframe % mmtk->minor_step_numberdim; /* minor step */
00881     }
00882 
00883     count[0] = 1;
00884     count[1] = mmtk->atom_numberdim;
00885     count[2] = mmtk->xyzdim;
00886     count[3] = 1;             /* only want one minor step, regardless */
00887 
00888     rc = nc_get_vara_float(cdf->ncid, mmtk->configuration_id, 
00889                            start, count, ts->coords);
00890     if (rc != NC_NOERR) 
00891       return MOLFILE_ERROR;
00892 
00893     /* check for allocated but not yet used frame */
00894     if (ts->coords[0] == NC_FILL_FLOAT)
00895       return MOLFILE_ERROR;
00896 
00897     /* scale coordinates from nanometers to angstroms */
00898     for (i=0; i<(3 * mmtk->atom_numberdim); i++) {
00899       ts->coords[i] *= 10.0f;
00900     }
00901 
00902     /* Read the PBC box info. */
00903     if (mmtk->has_box) {
00904       float lengths[3];
00905 
00906       if (mmtk->minor_step_numberdim == 0) {
00907         start[0] = cdf->curframe; /* step */
00908         start[1] = 0;             /* box_size */
00909         start[2] = 0;             /* minor step */
00910       }
00911       else {
00912         start[0] = cdf->curframe/mmtk->minor_step_numberdim;   /* step */
00913         start[1] = 0;             /* box_size */
00914         start[2] = cdf->curframe % mmtk->minor_step_numberdim; /* minor step */
00915       }
00916 
00917       count[0] = 1;
00918       count[1] = 3;
00919       count[2] = 1;
00920 
00921       rc = nc_get_vara_float(cdf->ncid, mmtk->box_size_id,
00922                              start, count, lengths);
00923       if (rc != NC_NOERR) 
00924         return MOLFILE_ERROR;
00925 
00926       ts->A = 10.*lengths[0];
00927       ts->B = 10.*lengths[1];
00928       ts->C = 10.*lengths[2];
00929 
00930       ts->alpha = 90.;
00931       ts->beta  = 90.;
00932       ts->gamma = 90.;
00933     }
00934   }
00935 
00936   cdf->curframe++;
00937   return MOLFILE_SUCCESS;
00938 }
00939 
00940 
00941 
00942 static int read_cdf_timestep(void *mydata, int natoms, molfile_timestep_t *ts) {
00943   cdfdata *cdf = (cdfdata *)mydata;
00944 
00945   switch (cdf->type) {
00946     case CDF_TYPE_AMBER: 
00947       return read_amber_cdf_timestep(mydata, natoms, ts); 
00948 
00949     case CDF_TYPE_MMTK:
00950       return read_mmtk_cdf_timestep(mydata, natoms, ts); 
00951   }
00952 
00953   return MOLFILE_ERROR;
00954 }
00955 
00956 
00957 static molfile_plugin_t cdfplugin = {
00958   vmdplugin_ABIVERSION,                         /* ABI version */
00959   MOLFILE_PLUGIN_TYPE,                          /* type */
00960   "netcdf",                                     /* short name */
00961   "NetCDF (AMBER, MMTK)",                       /* pretty name */
00962   "John E. Stone",                              /* author */
00963   0,                                            /* major version */
00964   5,                                            /* minor version */
00965   VMDPLUGIN_THREADSAFE,                         /* is reentrant */
00966   "nc",                                         /* filename extensions */
00967   open_cdf_read,
00968   read_cdf_structure,
00969   0,
00970   read_cdf_timestep,
00971   close_cdf_read,
00972   0,
00973   0,
00974   0,
00975   0,
00976 };
00977 
00978 VMDPLUGIN_API int VMDPLUGIN_init(void) { return VMDPLUGIN_SUCCESS; }
00979 VMDPLUGIN_API int VMDPLUGIN_fini(void) { return VMDPLUGIN_SUCCESS; }
00980 VMDPLUGIN_API int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) {
00981   (*cb)(v, (vmdplugin_t *)&cdfplugin);
00982   return VMDPLUGIN_SUCCESS;
00983 }
00984 

Generated on Wed Mar 22 13:15:28 2006 for VMD Plugins (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002