Best Python code snippet using autotest_python
load_datasets.py
Source:load_datasets.py  
...10                 log_every=1000,11                 cache_file_path='./bbbp_dglgraph.bin',12                 n_jobs=1):13        self._url = 'dataset/bbbp.zip'14        data_path = get_download_dir() + '/bbbp.zip'15        dir_path = get_download_dir() + '/bbbp'16        download(_get_dgl_url(self._url), path=data_path, overwrite=False)17        extract_archive(data_path, dir_path)18        df = pd.read_csv(dir_path + '/BBBP.csv')19        super(BBBP, self).__init__(df=df,20                                   smiles_to_graph=smiles_to_graph,21                                   smiles_column='smiles',22                                   cache_file_path=cache_file_path,23                                   task_names=['p_np'],24                                   load=load,25                                   log_every=log_every,26                                   init_mask=True,27                                   n_jobs=n_jobs)28        self.load_full = False29        self.names = df['name'].tolist()30        self.names = [self.names[i] for i in self.valid_ids]31    def __getitem__(self, item):32        if self.load_full:33            return self.smiles[item], self.graphs[item], self.labels[item], \34                   self.mask[item], self.names[item]35        else:36            return self.smiles[item], self.graphs[item], self.labels[item], self.mask[item]37class BACE(MoleculeCSVDataset):38    def __init__(self,39                 smiles_to_graph=smiles_2_dgl,40                 load=False,41                 log_every=1000,42                 cache_file_path='./bace_dglgraph.bin',43                 n_jobs=1):44        self._url = 'dataset/bace.zip'45        data_path = get_download_dir() + '/bace.zip'46        dir_path = get_download_dir() + '/bace'47        download(_get_dgl_url(self._url), path=data_path, overwrite=False)48        extract_archive(data_path, dir_path)49        df = pd.read_csv(dir_path + '/bace.csv')50        super(BACE, self).__init__(df=df,51                                   smiles_to_graph=smiles_to_graph,52                                   smiles_column='mol',53                                   cache_file_path=cache_file_path,54                                   task_names=['Class'],55                                   load=load,56                                   log_every=log_every,57                                   init_mask=True,58                                   n_jobs=n_jobs)59        self.load_full = False60        self.ids = df['CID'].tolist()61        self.ids = [self.ids[i] for i in self.valid_ids]62    def __getitem__(self, item):63        if self.load_full:64            return self.smiles[item], self.graphs[item], self.labels[item], \65                   self.mask[item], self.ids[item]66        else:67            return self.smiles[item], self.graphs[item], self.labels[item], self.mask[item]68class MUV(MoleculeCSVDataset):69    def __init__(self,70                 smiles_to_graph=smiles_2_dgl,71                 load=False,72                 log_every=1000,73                 cache_file_path='./muv_dglgraph.bin',74                 n_jobs=1):75        self._url = 'dataset/muv.zip'76        data_path = get_download_dir() + '/muv.zip'77        dir_path = get_download_dir() + '/muv'78        download(_get_dgl_url(self._url), path=data_path, overwrite=False)79        extract_archive(data_path, dir_path)80        df = pd.read_csv(dir_path + '/muv.csv')81        self.ids = df['mol_id'].tolist()82        self.load_full = False83        df = df.drop(columns=['mol_id'])84        super(MUV, self).__init__(df=df,85                                  smiles_to_graph=smiles_to_graph,86                                  smiles_column='smiles',87                                  cache_file_path=cache_file_path,88                                  load=load,89                                  log_every=log_every,90                                  init_mask=True,91                                  n_jobs=n_jobs)92        self.ids = [self.ids[i] for i in self.valid_ids]93    def __getitem__(self, item):94        if self.load_full:95            return self.smiles[item], self.graphs[item], self.labels[item], \96                   self.mask[item], self.ids[item]97        else:98            return self.smiles[item], self.graphs[item], self.labels[item], self.mask[item]99class ClinTox(MoleculeCSVDataset):100    def __init__(self,101                 smiles_to_graph=smiles_2_dgl,102                 load=False,103                 log_every=1000,104                 cache_file_path='./clintox_dglgraph.bin',105                 n_jobs=1):106        self._url = 'dataset/clintox.zip'107        data_path = get_download_dir() + '/clintox.zip'108        dir_path = get_download_dir() + '/clintox'109        download(_get_dgl_url(self._url), path=data_path, overwrite=False)110        extract_archive(data_path, dir_path)111        df = pd.read_csv(dir_path + '/clintox.csv')112        super(ClinTox, self).__init__(df=df,113                                      smiles_to_graph=smiles_to_graph,114                                      smiles_column='smiles',115                                      cache_file_path=cache_file_path,116                                      load=load,117                                      log_every=log_every,118                                      init_mask=True,119                                      n_jobs=n_jobs)120    def __getitem__(self, item):121        return self.smiles[item], self.graphs[item], self.labels[item], self.mask[item]122class SIDER(MoleculeCSVDataset):123    def __init__(self,124                 smiles_to_graph=smiles_2_dgl,125                 load=False,126                 log_every=1000,127                 cache_file_path='./sider_dglgraph.bin',128                 n_jobs=1):129        self._url = 'dataset/sider.zip'130        data_path = get_download_dir() + '/sider.zip'131        dir_path = get_download_dir() + '/sider'132        download(_get_dgl_url(self._url), path=data_path, overwrite=False)133        extract_archive(data_path, dir_path)134        df = pd.read_csv(dir_path + '/sider.csv')135        super(SIDER, self).__init__(df=df,136                                    smiles_to_graph=smiles_to_graph,137                                    smiles_column='smiles',138                                    cache_file_path=cache_file_path,139                                    load=load,140                                    log_every=log_every,141                                    init_mask=True,142                                    n_jobs=n_jobs)143    def __getitem__(self, item):144        return self.smiles[item], self.graphs[item], self.labels[item], self.mask[item]145class ToxCast(MoleculeCSVDataset):146    def __init__(self,147                 smiles_to_graph=smiles_2_dgl,148                 load=False,149                 log_every=1000,150                 cache_file_path='./toxcast_dglgraph.bin',151                 n_jobs=1):152        self._url = 'dataset/toxcast.zip'153        data_path = get_download_dir() + '/toxcast.zip'154        dir_path = get_download_dir() + '/toxcast'155        download(_get_dgl_url(self._url), path=data_path, overwrite=False)156        extract_archive(data_path, dir_path)157        df = pd.read_csv(dir_path + '/toxcast_data.csv')158        super(ToxCast, self).__init__(df=df,159                                      smiles_to_graph=smiles_to_graph,160                                      smiles_column='smiles',161                                      cache_file_path=cache_file_path,162                                      load=load,163                                      log_every=log_every,164                                      init_mask=True,165                                      n_jobs=n_jobs)166    def __getitem__(self, item):167        return self.smiles[item], self.graphs[item], self.labels[item], self.mask[item]168class HIV(MoleculeCSVDataset):169    def __init__(self,170                 smiles_to_graph=smiles_2_dgl,171                 load=False,172                 log_every=1000,173                 cache_file_path='./hiv_dglgraph.bin',174                 n_jobs=1):175        self._url = 'dataset/hiv.zip'176        data_path = get_download_dir() + '/hiv.zip'177        dir_path = get_download_dir() + '/hiv'178        download(_get_dgl_url(self._url), path=data_path, overwrite=False)179        extract_archive(data_path, dir_path)180        df = pd.read_csv(dir_path + '/HIV.csv')181        self.activity = df['activity'].tolist()182        self.load_full = False183        df = df.drop(columns=['activity'])184        super(HIV, self).__init__(df=df,185                                  smiles_to_graph=smiles_to_graph,186                                  smiles_column='smiles',187                                  cache_file_path=cache_file_path,188                                  load=load,189                                  log_every=log_every,190                                  init_mask=True,191                                  n_jobs=n_jobs)192        self.activity = [self.activity[i] for i in self.valid_ids]193    def __getitem__(self, item):194        if self.load_full:195            return self.smiles[item], self.graphs[item], self.labels[item], \196                   self.mask[item], self.activity[item]197        else:198            return self.smiles[item], self.graphs[item], self.labels[item], self.mask[item]199class Tox21(MoleculeCSVDataset):200    def __init__(self, smiles_to_graph=smiles_2_dgl,201                 load=False,202                 log_every=1000,203                 cache_file_path='./tox21_dglgraph.bin',204                 n_jobs=1):205        self._url = 'dataset/tox21.csv.gz'206        data_path = get_download_dir() + '/tox21.csv.gz'207        download(_get_dgl_url(self._url), path=data_path, overwrite=False)208        df = pd.read_csv(data_path)209        self.id = df['mol_id']210        df = df.drop(columns=['mol_id'])211        self.load_full = False212        super(Tox21, self).__init__(df, smiles_to_graph, 213                                    smiles_column='smiles',214                                    cache_file_path=cache_file_path,215                                    load=load, log_every=log_every, n_jobs=n_jobs)216        self.id = [self.id[i] for i in self.valid_ids]217    def __getitem__(self, item):218        if self.load_full:219            return self.smiles[item], self.graphs[item], self.labels[item], \220                   self.mask[item], self.id[item]221        else:222            return self.smiles[item], self.graphs[item], self.labels[item], self.mask[item]223class ESOL(MoleculeCSVDataset):224    def __init__(self,225                 smiles_to_graph=smiles_2_dgl,226                 load=False,227                 log_every=1000,228                 cache_file_path='./esol_dglgraph.bin',229                 n_jobs=1):230        self._url = 'dataset/ESOL.zip'231        data_path = get_download_dir() + '/ESOL.zip'232        dir_path = get_download_dir() + '/ESOL'233        download(_get_dgl_url(self._url), path=data_path, overwrite=False)234        extract_archive(data_path, dir_path)235        df = pd.read_csv(dir_path + '/delaney-processed.csv')236        super(ESOL, self).__init__(df=df,237                                   smiles_to_graph=smiles_to_graph,238                                   smiles_column='smiles',239                                   cache_file_path=cache_file_path,240                                   task_names=['measured log solubility in mols per litre'],241                                   load=load,242                                   log_every=log_every,243                                   init_mask=False,244                                   n_jobs=n_jobs)245        self.load_full = False246        # Compound names in PubChem247        self.compound_names = df['Compound ID'].tolist()248        self.compound_names = [self.compound_names[i] for i in self.valid_ids]249        # Estimated solubility250        self.estimated_solubility = df['ESOL predicted log solubility in mols per litre'].tolist()251        self.estimated_solubility = [self.estimated_solubility[i] for i in self.valid_ids]252        # Minimum atom degree253        self.min_degree = df['Minimum Degree'].tolist()254        self.min_degree = [self.min_degree[i] for i in self.valid_ids]255        # Molecular weight256        self.mol_weight = df['Molecular Weight'].tolist()257        self.mol_weight = [self.mol_weight[i] for i in self.valid_ids]258        # Number of H-Bond Donors259        self.num_h_bond_donors = df['Number of H-Bond Donors'].tolist()260        self.num_h_bond_donors = [self.num_h_bond_donors[i] for i in self.valid_ids]261        # Number of rings262        self.num_rings = df['Number of Rings'].tolist()263        self.num_rings = [self.num_rings[i] for i in self.valid_ids]264        # Number of rotatable bonds265        self.num_rotatable_bonds = df['Number of Rotatable Bonds'].tolist()266        self.num_rotatable_bonds = [self.num_rotatable_bonds[i] for i in self.valid_ids]267        # Polar Surface Area268        self.polar_surface_area = df['Polar Surface Area'].tolist()269        self.polar_surface_area = [self.polar_surface_area[i] for i in self.valid_ids]270    def __getitem__(self, item):271        if self.load_full:272            return self.smiles[item], self.graphs[item], self.labels[item], \273                   self.compound_names[item], self.estimated_solubility[item], \274                   self.min_degree[item], self.mol_weight[item], \275                   self.num_h_bond_donors[item], self.num_rings[item], \276                   self.num_rotatable_bonds[item], self.polar_surface_area[item]277        else:278            return self.smiles[item], self.graphs[item], self.labels[item]279class FreeSolv(MoleculeCSVDataset):280    def __init__(self,281                 smiles_to_graph=smiles_2_dgl,282                 load=False,283                 log_every=1000,284                 cache_file_path='./freesolv_dglgraph.bin',285                 n_jobs=1):286        self._url = 'dataset/FreeSolv.zip'287        data_path = get_download_dir() + '/FreeSolv.zip'288        dir_path = get_download_dir() + '/FreeSolv'289        download(_get_dgl_url(self._url), path=data_path, overwrite=False)290        extract_archive(data_path, dir_path)291        df = pd.read_csv(dir_path + '/SAMPL.csv')292        super(FreeSolv, self).__init__(df=df,293                                       smiles_to_graph=smiles_to_graph,294                                       smiles_column='smiles',295                                       cache_file_path=cache_file_path,296                                       task_names=['expt'],297                                       load=load,298                                       log_every=log_every,299                                       init_mask=False,300                                       n_jobs=n_jobs)301        self.load_full = False302        # Iupac names303        self.iupac_names = df['iupac'].tolist()304        self.iupac_names = [self.iupac_names[i] for i in self.valid_ids]305        # Calculated hydration free energy306        self.calc_energy = df['calc'].tolist()307        self.calc_energy = [self.calc_energy[i] for i in self.valid_ids]308    def __getitem__(self, item):309        if self.load_full:310            return self.smiles[item], self.graphs[item], self.labels[item], \311                   self.iupac_names[item], self.calc_energy[item]312        else:313            return self.smiles[item], self.graphs[item], self.labels[item]314class Lipophilicity(MoleculeCSVDataset):315    def __init__(self,316                 smiles_to_graph=smiles_2_dgl,317                 load=False,318                 log_every=1000,319                 cache_file_path='./lipophilicity_dglgraph.bin',320                 n_jobs=1):321        self._url = 'dataset/lipophilicity.zip'322        data_path = get_download_dir() + '/lipophilicity.zip'323        dir_path = get_download_dir() + '/lipophilicity'324        download(_get_dgl_url(self._url), path=data_path, overwrite=False)325        extract_archive(data_path, dir_path)326        df = pd.read_csv(dir_path + '/Lipophilicity.csv')327        super(Lipophilicity, self).__init__(df=df,328                                            smiles_to_graph=smiles_to_graph,329                                            smiles_column='smiles',330                                            cache_file_path=cache_file_path,331                                            task_names=['exp'],332                                            load=load,333                                            log_every=log_every,334                                            init_mask=False,335                                            n_jobs=n_jobs)336        self.load_full = False337        # ChEMBL ids...mmap_libt0.4_pp0_fp2.0_3.py
Source:mmap_libt0.4_pp0_fp2.0_3.py  
...10import zipfile11from six.moves import urllib12from . import config13logger = logging.getLogger(__name__)14def get_download_dir():15    """Return the directory in which downloaded and converted datasets are stored.16    This directory is defined by the environment variable ``CHAINER_DATASET_ROOT``.17    If the environment variable is not specified, it defaults to ``$HOME/.chainer/dataset``.18    Returns:19        str: The path to the download directory.20    """21    return config.get_download_dir()22def get_dataset_directory(dataset_name):23    """Return the directory in which the given dataset is stored.24    Args:25        dataset_name (str): The name of the dataset.26    Returns:27        str: The path to the dataset directory.28    """...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
