/********************************************************************** obfit = Fit molecules according to a SMART pattern Copyright (C) 2003 Fabien Fontaine Some portions Copyright (C) 2004-2005 Geoffrey R. Hutchison This file is part of the Open Babel project. For more information, see This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. ***********************************************************************/ /* Require a fixed molecule, a set of molecules to move, a SMART pattern to match the fixed and the moving molecules If the SMART is not found in the fixed molecule the program exits If the SMART is not found in a moving molecule, the molecule is not moved example of command line: obfit "[nh]1c2c(=O)n(C)c(=O)n(C)c2cc1" testref.sdf testmv.sdf */ // used to set import/export for Cygwin DLLs #ifdef WIN32 #define USING_OBDLL #endif #include #include #include #include #include using namespace std; using namespace OpenBabel; //find the center of mass of a list of atoms vector3 mass_c( vector &aindex, OBMol &mol); /////////////////////////////////////////////////////////////////////////////// //! \brief superimpose a set of molecules on the atoms of a reference molecule //! The atoms used for the overlay are defined by the SMART pattern int main(int argc,char **argv) { int errflg=0; char *FileRef=NULL, *FileMove=NULL, *Pattern=NULL; string err; char *program_name=argv[0]; // parse the command line if (argc!=4) { errflg++; } else { FileRef = argv[2]; FileMove = argv[3]; Pattern = argv[1]; } if (errflg) { err = "Usage: "; err += program_name; err += " \"PATTERN\" \n"; ThrowError(err); exit(-1); } // create the pattern OBSmartsPattern sp; if (!sp.Init(Pattern)) { err = program_name; err += ": Unable to read the SMART: "; err += Pattern; ThrowError(err); exit(-1); } // Find Input filetypes OBConversion conv; OBFormat *refFormat = conv.FormatFromExt(FileRef); if (!refFormat || !conv.SetInAndOutFormats(refFormat,refFormat)) { cerr << program_name << ": cannot read fixed molecule format!" << endl; exit (-1); } OBFormat *moveFormat = conv.FormatFromExt(FileMove); if (!moveFormat || !conv.SetInAndOutFormats(moveFormat,moveFormat)) { cerr << program_name << ": cannot read moving molecule(s) format!" << endl; exit (-1); } conv.SetInAndOutFormats(refFormat,moveFormat); ifstream ifsref; OBMol molref; vector< vector > maplist; // list of matched atoms vector< vector >::iterator i; // and its iterators vector< int >::iterator j; vector refatoms; //Read the reference structure ifsref.open(FileRef); if (!ifsref) { cerr << program_name << ": cannot read fixed molecule file: " << FileRef << endl; exit (-1); } molref.Clear(); conv.Read(&molref,&ifsref); // and check if the SMART match sp.Match(molref); maplist = sp.GetUMapList(); // get unique matches if (maplist.empty()) { err = program_name; err += ": Unable to map SMART: "; err += Pattern; err += " in reference molecule: "; err += FileRef; ThrowError(err); exit(-1); } // Find the matching atoms for (i = maplist.begin(); i != maplist.end(); ++i) { refatoms.clear(); // Save only the last set of atoms for(j= (*i).begin(); j != (*i).end(); ++j) { refatoms.push_back(*j); } } // set the translation vector vector3 tvref(0,0,0); OBAtom *atom; unsigned int c; tvref = mass_c(refatoms, molref); // center the molecule molref.Translate(-tvref); //get the coordinates of the SMART atoms double *refcoor = new double[refatoms.size()*3]; for(c=0; cx(); refcoor[c*3+1] = atom->y(); refcoor[c*3+2] = atom->z(); } conv.SetInAndOutFormats(moveFormat,moveFormat); ifstream ifsmv; OBMol molmv; vector mvatoms; vector3 tvmv; unsigned int size=0; double rmatrix[3][3]; //Read the moving structures ifsmv.open(FileMove); if (!ifsmv) { cerr << program_name << ": cannot read file: " << FileMove << endl; exit (-1); } for (;;) { molmv.Clear(); conv.Read(&molmv,&ifsmv); // Read molecule if (molmv.Empty()) break; if (sp.Match(molmv)) // if match perform rotation { maplist = sp.GetMapList(); // get all matches // Find the matching atoms // Looping over all matches to find best match double rmsd; double best_rmsd = 999.999; vector best_mvatoms; for (i = maplist.begin(); i != maplist.end(); ++i) { mvatoms.clear(); // Save only the last set of atoms for(j= (*i).begin(); j != (*i).end(); ++j) { mvatoms.push_back(*j); } tvmv = mass_c(mvatoms, molmv); // center the molecule molmv.Translate(-tvmv); //Find the rotation matrix size = mvatoms.size(); if (size != refatoms.size()) { err = program_name; err += ": Error: not the same number of SMART atoms"; ThrowError(err); exit(-1); } double *mvcoor = new double[size*3]; for(c=0; c < size; ++c) { atom = molmv.GetAtom(mvatoms[c]); mvcoor[c*3] = atom->x(); mvcoor[c*3+1] = atom->y(); mvcoor[c*3+2] = atom->z(); } // quaternion fit qtrfit(refcoor, mvcoor, size, rmatrix); //rotate all the atoms molmv.Rotate(rmatrix); // update mvcoor after rotation for(c=0; c < size; ++c) { atom = molmv.GetAtom(mvatoms[c]); mvcoor[c*3] = atom->x(); mvcoor[c*3+1] = atom->y(); mvcoor[c*3+2] = atom->z(); } rmsd = calc_rms(refcoor,mvcoor,size); if ( rmsd < best_rmsd ) { best_rmsd = rmsd; best_mvatoms.clear(); best_mvatoms.resize(mvatoms.size()); best_mvatoms = mvatoms; } delete[] mvcoor; } // loop through matches // Refit molecule using best match mvatoms.clear(); mvatoms.resize(best_mvatoms.size()); mvatoms = best_mvatoms; tvmv = mass_c(mvatoms, molmv); // center the molecule molmv.Translate(-tvmv); //Find the rotation matrix size = mvatoms.size(); if (size != refatoms.size()) { err = program_name; err += ": Error: not the same number of SMART atoms"; ThrowError(err); exit(-1); } double *mvcoor = new double[size*3]; for(c=0; cx(); mvcoor[c*3+1] = atom->y(); mvcoor[c*3+2] = atom->z(); } // quaternion fit qtrfit(refcoor, mvcoor, size, rmatrix); //rotate all the atoms molmv.Rotate(rmatrix); for(c=0; cx(); mvcoor[c*3+1] = atom->y(); mvcoor[c*3+2] = atom->z(); } rmsd = calc_rms(refcoor,mvcoor,size); char rmsd_string[80]; sprintf(rmsd_string,"%f", best_rmsd); OBPairData *dp = new OBPairData; string field_name = "RMSD"; dp->SetAttribute(field_name); dp->SetValue(rmsd_string); dp->SetOrigin(external); molmv.SetData(dp); cerr << "RMSD: " << rmsd_string << endl; //translate the rotated molecule molmv.Translate(tvref); delete[] mvcoor; } conv.Write(&molmv,&cout); } delete[] refcoor; return(0); } /////////////////////////////////////////////////////////////////////////////// //find the center of mass of a list of atoms vector3 mass_c( vector &aindex, OBMol &mol) { vector3 center(0,0,0); vector< int >::iterator j; OBAtom *atom; for(j= aindex.begin(); j != aindex.end(); ++j) { atom = mol.GetAtom(*j); center += atom->GetVector(); } center /= (float) aindex.size(); return (center); } /* obfit man page*/ /** \page obfit superimpose two molecules based on a pattern * * \n * \par SYNOPSIS * * \b obfit \ \ * * \par DESCRIPTION * * Superimpose two molecules using a quaternion fit. The atoms used to fit the * two molecules are defined by the SMARTS pattern given by the user. It is * useful to align congeneric series of molecules on a common structural * scaffold for 3D-QSAR studies. It can also be useful for displaying the * results of conformational generation. * \n\n * Any molecules matching the supplied SMARTS pattern will be rotated and * translated to provide the smallest possible RMSD between the matching * regions. If a molecule does not match the SMARTS pattern, it will be output * with no transformation. * * \par EXAMPLES * - Align all the molecules in 'moving.sdf' on a single molecule of 'fixed.sdf' * by superimposing them on its N-methylpiperidyl portion \n * obfit "[nh]1c2c(=O)n(C)c(=O)n(C)c2cc1" testref.sdf testmv.sdf * * \par AUTHORS * * The obfit program was contributed by \b Fabien \b Fontaine. * * Open Babel is currently maintained by \b Geoff \b Hutchison, \b Chris \b Morley and \b Michael \b Banck. * * For more contributors to Open Babel, see http://openbabel.sourceforge.net/THANKS.shtml * * \par COPYRIGHT * Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. * Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison \n \n * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation version 2 of the License.\n \n * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * \par SEE ALSO * The web pages for Open Babel can be found at http://openbabel.sourceforge.net/ \n * A guide for constructing SMARTS patterns can be found at http://www.daylight.com/dayhtml/doc/theory/theory.smarts.html **/