import os
import shutil
import tempfile
from chemicalchecker.util import psql
from chemicalchecker.util.pipeline import BaseTask
from chemicalchecker.util import logged, HPC
# We got these strings by doing: pg_dump -t 'scores' --schema-only mosaic
# -h aloy-dbsrv
DROP_TABLE_DESC = "DROP TABLE IF EXISTS public.library_description"
DROP_TABLE = "DROP TABLE IF EXISTS public.libraries"
CREATE_TABLE = """CREATE TABLE public.libraries (
inchikey text,
lib text,
is_bioactive smallint,
is_landmark smallint
);"""
CREATE_TABLE_DESC = """CREATE TABLE public.library_description (
lib text NOT NULL PRIMARY KEY,
name text,
description text,
urls text,
parser text,
rank integer
);"""
CREATE_INDEX = """
CREATE INDEX inchikey_libraries_idx ON public.libraries USING btree (inchikey);
CREATE INDEX is_bioactive_libraries_idx ON public.libraries USING btree (is_bioactive);
CREATE INDEX is_landmark_libraries_idx ON public.libraries USING btree (is_landmark);
CREATE INDEX lib_libraries_idx ON public.libraries USING btree (lib);
ALTER TABLE ONLY public.libraries ADD CONSTRAINT libraries_lib_fkey FOREIGN KEY (lib) REFERENCES public.library_description(lib);
"""
CREATE_INDEX_DESC = """
CREATE INDEX lib_library_description_idx ON public.library_description USING btree (lib);
CREATE INDEX name_library_description_idx ON public.library_description USING btree (name);
CREATE INDEX rank_library_description_idx ON public.library_description USING btree (rank);
"""
INSERT_DESC = "INSERT INTO library_description VALUES %s"
CHECK = "select distinct(lib) from libraries"
[docs]@logged
class Libraries(BaseTask):
def __init__(self, name=None, **params):
task_id = params.get('task_id', None)
if task_id is None:
params['task_id'] = name
BaseTask.__init__(self, name, **params)
self.DB = params.get('DB', None)
if self.DB is None:
raise Exception('DB parameter is not set')
self.CC_ROOT = params.get('CC_ROOT', None)
if self.CC_ROOT is None:
raise Exception('CC_ROOT parameter is not set')
self.libraries = params.get('libraries', None)
if self.libraries is None:
raise Exception('libraries parameter is not set')
[docs] def run(self):
"""Run the molecular info step."""
script_path = os.path.join(os.path.dirname(
os.path.realpath(__file__)), "scripts/libraries.py")
universe_file = os.path.join(self.cachedir, "universe.h5")
try:
self.__log.info("Creating table")
psql.query(DROP_TABLE, self.DB)
psql.query(CREATE_TABLE, self.DB)
psql.query(DROP_TABLE_DESC, self.DB)
psql.query(CREATE_TABLE_DESC, self.DB)
except Exception as e:
self.__log.error("Error while creating libraries table")
if not self.custom_ready():
raise Exception(e)
else:
self.__log.error(e)
return
i = 1
for idname, v in self.libraries.items():
s = "('%s',%s,%d)" % (idname, ",".join(["'%s'" % x for x in v]), i)
psql.query(INSERT_DESC % s, self.DB)
i += 1
self.__log.info("Genretaing libraries for " +
str(len(self.libraries.keys())) + " libraries")
job_path = tempfile.mkdtemp(
prefix='jobs_libraries_', dir=self.tmpdir)
libraries_path = os.path.join(self.tmpdir, "libraries_files")
if not os.path.exists(libraries_path):
original_umask = os.umask(0)
os.makedirs(libraries_path, 0o775)
os.umask(original_umask)
params = {}
params["num_jobs"] = len(self.libraries.keys())
params["jobdir"] = job_path
params["job_name"] = "CC_LIBRARIES"
params["elements"] = self.libraries
params["wait"] = True
params["memory"] = 20
params["cpu"] = 10
# job command
cc_config_path = self.config.config_path
cc_package = os.path.join(self.config.PATH.CC_REPO, 'package')
singularity_image = self.config.PATH.SINGULARITY_IMAGE
command = "SINGULARITYENV_PYTHONPATH={} SINGULARITYENV_CC_CONFIG={} singularity exec {} python {} <TASK_ID> <FILE> {} {} {} {}"
command = command.format(
cc_package, cc_config_path, singularity_image, script_path, universe_file, libraries_path, self.DB, self.CC_ROOT)
# submit jobs
cluster = HPC.from_config(self.config)
jobs = cluster.submitMultiJob(command, **params)
try:
self.__log.info("Checking table")
libs = psql.qstring(CHECK, self.DB)
if len(libs) != len(self.libraries.keys()):
if not self.custom_ready():
raise Exception(
"Not all libs were added to libraries (%d/%d)" % (len(libs), len(self.libraries.keys())))
else:
self.__log.error(
"Not all libs were added to libraries (%d/%d)" % (len(libs), len(self.libraries.keys())))
else:
self.__log.info("Indexing table")
psql.query(CREATE_INDEX, self.DB)
psql.query(CREATE_INDEX_DESC, self.DB)
shutil.rmtree(job_path, ignore_errors=True)
self.mark_ready()
except Exception as e:
self.__log.error("Error while checking libraries table")
if not self.custom_ready():
raise Exception(e)
else:
self.__log.error(e)
[docs] def execute(self, context):
"""Run the molprops step."""
self.tmpdir = context['params']['tmpdir']
self.run()