Source code for chemicalchecker.util.parser.converter

"""Standardize molecule and convert between identifier."""
import json
from six.moves.urllib.request import urlopen
from six.moves.urllib.parse import quote

from chemicalchecker.util import logged


[docs]class ConversionError(Exception): """Conversion error.""" def __init__(self, message, idx): """Initialize a ConversionError.""" message = "Cannot convert: %s Message: %s" % (idx, message) super(Exception, self).__init__(message)
[docs]@logged class Converter(): """Converter class.""" def __init__(self): """Initialize a Converter instance.""" try: import rdkit.Chem as Chem from rdkit.Chem.Scaffolds import MurckoScaffold self.Chem = Chem self.scaffold = MurckoScaffold except ImportError: raise ImportError("requires rdkit " + "https://www.rdkit.org/") try: from chembl_structure_pipeline.standardizer import standardize_mol self.standardize = standardize_mol except ImportError: raise ImportError("requires chembl_structure_pipeline") try: import pubchempy as pcp self.pcp = pcp except ImportError: raise ImportError("requires pubchempy")
[docs] def smiles_to_scaffold(self, smiles, generic=False): """From SMILES to the SMILES of its scaffold.""" scaffold_smiles = self.scaffold.MurckoScaffoldSmiles(smiles) if generic: scaffold_mol = self.scaffold.MakeScaffoldGeneric( self.Chem.MolFromSmiles(scaffold_smiles)) scaffold_smiles = self.Chem.MolToSmiles(scaffold_mol) return scaffold_smiles
[docs] def smiles_to_inchi(self, smiles): """From SMILES to InChIKey and InChI.""" mol = self.Chem.MolFromSmiles(smiles) if not mol: raise ConversionError("MolFromSmiles returned None", smiles) try: mol = self.standardize(mol) except Exception as ex: raise ConversionError("'standardize' exception:", smiles) inchi = self.Chem.rdinchi.MolToInchi(mol)[0] if not inchi: raise ConversionError("'MolToInchi' returned None.", smiles) inchikey = self.Chem.rdinchi.InchiToInchiKey(inchi) if not inchikey: raise ConversionError("'InchiToInchiKey' returned None", smiles) try: mol = self.Chem.rdinchi.InchiToMol(inchi)[0] except Exception as ex: raise ConversionError("'InchiToMol' exception:", smiles) return inchikey, inchi
[docs] def inchi_to_smiles(self, inchi): """From InChI to SMILES.""" try: inchi_ascii = inchi.encode('ascii', 'ignore') mol = self.Chem.rdinchi.InchiToMol(inchi_ascii)[0] except Exception as ex: raise ConversionError("'InchiToMol' exception:", inchi) try: mol = self.standardize(mol) except Exception as ex: raise ConversionError("'standardize' exception:", inchi) return self.Chem.MolToSmiles(mol, isomericSmiles=True)
[docs] def inchi_to_inchikey(self, inchi): """From InChI to InChIKey.""" try: inchi_ascii = inchi.encode('ascii', 'ignore') inchikey = self.Chem.rdinchi.InchiToInchiKey(inchi_ascii) except Exception as ex: raise ConversionError("'InchiToInchiKey' exception:", inchi) return inchikey
[docs] def inchi_to_mol(self, inchi): """From InChI to molecule.""" try: inchi_ascii = inchi.encode("ascii", "ignore") mol = self.Chem.rdinchi.InchiToMol(inchi_ascii)[0] except Exception as ex: raise ConversionError("'InchiToMol' exception:", inchi) try: mol = self.standardize(mol) except Exception as ex: raise ConversionError("'standardize' exception:", inchi) return mol
[docs] @staticmethod def ctd_to_smiles(ctdid): """From CTD identifier to SMILES.""" # convert to pubchemcid try: url = 'http://pubchem.ncbi.nlm.nih.gov/rest/pug/substance/' + \ 'sourceid/Comparative%20Toxicogenomics%20Database/' + \ ctdid + '/cids/TXT/' pubchemcid = urlopen(url).read().rstrip().decode() except Exception as ex: Converter.__log.warning(str(ex)) raise ConversionError("Cannot fetch PubChemID CID from CTD", ctdid) # get smiles try: url = 'http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/' + \ 'cid/%s/property/CanonicalSMILES/TXT/' % pubchemcid smiles = urlopen(url).read().rstrip().decode() except Exception as ex: Converter.__log.warning(str(ex)) raise ConversionError( "Cannot fetch SMILES from PubChemID CID", pubchemcid) return smiles
[docs] @staticmethod def chemical_name_to_smiles(chem_name): """From Chemical Name to SMILES via cactus.nci or pubchem.""" smiles = None chem_name_quoted = quote(chem_name) smiles = Converter._chemical_name_to_smiles_cactus(chem_name_quoted) if smiles is not None: return smiles smiles = Converter._chemical_name_to_smiles_pubchem(chem_name) if smiles is None: raise ConversionError( "Cannot fetch SMILES from Chemical Name", chem_name) return smiles
[docs] @staticmethod def chemical_name_to_inchi(chem_name): """From Chemical Name to InChI via cactus.nci or pubchem.""" inchi = None chem_name_quoted = quote(chem_name) inchi = Converter._chemical_name_to_inchi_cactus(chem_name_quoted) if inchi is not None: return inchi inchi = Converter._chemical_name_to_inchi_pubchem(chem_name) if inchi is None: raise ConversionError( "Cannot fetch InChI from Chemical Name", chem_name) return inchi
@staticmethod def _chemical_name_to_smiles_cactus(chem_name): """From chemical name to SMILES.""" try: url = 'http://cactus.nci.nih.gov/chemical/' + \ 'structure/%s/smiles' % chem_name smiles = urlopen(url).read().rstrip().decode() return smiles except Exception as ex: Converter.__log.warning( "Cannot convert Chemical Name " "to SMILES (cactus.nci): %s" % chem_name) return None @staticmethod def _chemical_name_to_inchi_cactus(chem_name): """From chemical name to InChI.""" try: url = 'http://cactus.nci.nih.gov/chemical/' + \ 'structure/%s/stdinchi' % chem_name inchi = urlopen(url).read().rstrip().decode() return inchi except Exception as ex: Converter.__log.warning( "Cannot convert Chemical Name " "to InChI (cactus.nci): %s" % chem_name) return None @staticmethod def _chemical_name_to_smiles_pubchem(chem_name): """From chemical name to SMILES.""" try: cpds = self.pcp.get_compounds(chem_name, 'name') if len(cpds) == 0: Converter.__log.warning( "Cannot convert Chemical Name " "to SMILES (pubchem): %s" % chem_name) return None if len(cpds) > 1: Converter.__log.warning( "Multiple CIDs found, using first: %s" % str(cpds)) return cpds[0].isomeric_smiles except Exception as ex: Converter.__log.warning( "Cannot convert Chemical Name " "to SMILES (pubchem): %s" % chem_name) return None @staticmethod def _chemical_name_to_inchi_pubchem(chem_name): """From chemical name to InChI.""" try: cpds = self.pcp.get_compounds(chem_name, 'name') if len(cpds) == 0: Converter.__log.warning( "Cannot convert Chemical Name " "to InChI (pubchem): %s" % chem_name) return None if len(cpds) > 1: Converter.__log.warning( "Multiple CIDs found, using first: %s" % str(cpds)) return cpds[0].inchi except Exception as ex: Converter.__log.warning( "Cannot convert Chemical Name " "to InChI (pubchem): %s" % chem_name) return None @staticmethod def _resove_inchikey_unichem(inchikey): try: inchikey = quote(inchikey) url = 'https://www.ebi.ac.uk/unichem/rest/inchi/%s' % inchikey res = json.loads(urlopen(url).read().rstrip().decode()) except Exception as ex: # Converter.__log.warning(str(ex)) raise ConversionError( "No response from unichem: %s" % url, inchikey) if isinstance(res, dict): err_msg = '; '.join(['%s: %s' % (k, v) for k, v in res.items()]) raise ConversionError(err_msg, inchikey) elif isinstance(res, list): if len(res) != 1: raise ConversionError( 'No results from unichem: %s' % str(res), inchikey) if 'standardinchi' not in res[0]: raise ConversionError( 'No results from unichem: %s' % str(res), inchikey) inchi = res[0]['standardinchi'] return inchi @staticmethod def _resove_inchikey_cactus(inchikey): try: inchikey = quote(inchikey) url = ("https://cactus.nci.nih.gov/" "chemical/structure/%s/stdinchi" % inchikey) res = urlopen(url).read().rstrip().decode() return res except Exception as ex: # Converter.__log.warning(str(ex)) raise ConversionError( "No response from cactus: %s" % url, inchikey) @staticmethod def _resove_inchikey_pubchem(inchikey): try: cpds = Converter().pcp.get_compounds(inchikey, 'inchikey') if len(cpds) == 0: raise ConversionError("No results from pubchem", inchikey) if len(cpds) > 1: pass # Converter.__log.debug( # "Multiple CIDs found, using first: %s" % str(cpds)) return cpds[0].inchi except Exception as ex: Converter.__log.warning(str(ex)) raise ConversionError( "No response from pubchem: %s" % url, inchikey)
[docs] @staticmethod def inchikey_to_inchi(inchikey, local_db=True, save_local=True): """From InChIKey to InChI. Precedence is given to the local db that will be the fastest option. If it is not found locally several provider are contacted, and we possibly want to add the it to the Molecule table. """ if local_db: from chemicalchecker.database import Molecule res = Molecule.get_inchikey_inchi_mapping([inchikey]) if res[inchikey] is not None: return res[inchikey] resolve_fns = { 'unichem': Converter._resove_inchikey_unichem, 'cactus': Converter._resove_inchikey_cactus, 'pubchem': Converter._resove_inchikey_pubchem, } inchi = None for provider, func in resolve_fns.items(): try: inchi = func(inchikey) break except: Converter.__log.debug( 'InChIKey %s not found via %s' % (inchikey, provider)) continue if inchi is None: raise ConversionError('Unable to resolve', inchikey) if save_local: from chemicalchecker.database import Molecule Molecule.add_bulk([[inchikey, inchi]]) return inchi