How to use molecule_file method in molecule

Best Python code snippet using molecule_python

molecules_to_cov.py

Source:molecules_to_cov.py

1"""2Given a molecule file (as output by CAMPAREE), generate a coverage file3"""4import argparse5import collections6import re7import numpy8parser = argparse.ArgumentParser(description="Given a molecule file (as output by CAMPAREE), generate a coverage file in the bedGraph format")9parser.add_argument("molecule_file", help="path to the input molecule file")10parser.add_argument("output_file", help="prefix for the output coverage files, two are output as output_file.forward.cov and output_file.reverse.cov")11parser.add_argument("-m", "--max_chromosome_size", help="size of largest chromosome in megabases (250 for humans, 196 for mouse), this can be an overestimate, determines memory use", default=260, type=int)12args = parser.parse_args()13cigar_re = re.compile("[\d]+[MINDSH]")14def make_chrom():15    print(f"Creating the {len(coverages)+1} chromosome")16    return numpy.zeros(args.max_chromosome_size * 1_000_000, dtype="int32")17coverages = collections.defaultdict(make_chrom)18with open(args.molecule_file) as molecule_file:19    for line in molecule_file:20        if line.startswith("#"):21            continue #Skip comment/header line22        transcript_id, chrom, start, cigar, ref_start, ref_cigar, strand, sequence = line.split('\t')23        # Really just need the ref_start, ref_cigar, chromosome and strand for coverage24        ref_start = int(ref_start)25        cigar_parts = cigar_re.findall(ref_cigar)26        reference_idx = ref_start27        for part in cigar_parts:28            match_length = int(part[:-1])29            match_type = part[-1]30            if match_type == "M":31                # Count the match and move along the reference32                coverages[chrom,strand][reference_idx:reference_idx+match_length] += 133                reference_idx += match_length34            elif match_type in "ND":35                # No match but we still move positions on the reference36                reference_idx += match_length37            else:38                # do nothing39                pass40with open(args.output_file + ".forward.cov", "w") as fwd_file, open(args.output_file + ".reverse.cov", "w") as rev_file:41    # Output headers42    fwd_header = f'track type=bedGraph name="Coverage  {args.molecule_file} forward strand" description="Coverage for {args.molecule_file} forward strand" visibility=full color=0,0,255 priority=20\n'43    rev_header = f'track type=bedGraph name="Coverage  {args.molecule_file} reverse strand" description="Coverage for {args.molecule_file} reverse strand" visibility=full color=255,0,0 priority=20\n'44    fwd_file.write(fwd_header)45    rev_file.write(rev_header)46    # Output the coverage of each strand of each chromosome47    # sorted alphabetically48    for chrom,strand in sorted(coverages.keys()):49        print(f"Starting chrom {chrom} strand {strand}")50        coverage = coverages[chrom,strand]51        # We allocated too much space for every chromosome and most didn't use it52        # unused is left as zeros, so we can just drop those on the end53        coverage = numpy.trim_zeros(coverage, 'b')54        print(f"\t{len(coverage)} bases to output")55        if strand == "+":56            cov_file = fwd_file57        else:58            cov_file = rev_file59        block_start = None60        block_height = None61        for i, height in enumerate(coverage):62            if block_height is None:63                # Start our first block64                block_start = i - 1 # Convert to 0-based for UCSC browser65                block_height = height66            elif block_height == height:67                # Still part of the existing block68                continue69            else:70                # Convert to 0-based for UCSC, so this gives the interval (block_start, block_end)71                # which is half-open 0 based and hence goes up to but not including the 1-based position i72                block_end = i - 173                # Don't output anything for 0s, but for everything else output the block74                if block_height != 0:75                    # Write out the block to the bed file76                    cov_file.write('\t'.join([chrom,77                                            str(block_start),78                                            str(block_end),79                                            str(block_height)]) + "\n")80                # Start a new block81                block_start = i - 1# Convert to 0-based for UCSC browser82                block_height = height83        if block_height is not None and block_height != 0:84            # Output the last block85                block_end = i - 1 # Convert to 0-based for UCSC browser86                # Write out the block to the bed file87                cov_file.write('\t'.join([chrom,88                                        str(block_start),89                                        str(block_end),...

load_mol.py

Source:load_mol.py

1from sys import stderr2from io import StringIO3import os4import numpy as np5from rdkit import Chem6from rdkit.Chem import AllChem7class MoleculeLoadException(Exception):8    def __init__(self, *args, **kwargs):9        Exception.__init__(*args, **kwargs)10def get_xyz_from_mol(mol):11    xyz = np.zeros((mol.GetNumAtoms(), 3))12    conf = mol.GetConformer()13    for i in range(conf.GetNumAtoms()):14        position = conf.GetAtomPosition(i)15        xyz[i, 0] = position.x16        xyz[i, 1] = position.y17        xyz[i, 2] = position.z18    return (xyz)19def add_hydrogens_to_mol(mol):20    """21    Add hydrogens to a molecule object22    TODO (LESWING) see if there are more flags to add here for default23    :param mol: Rdkit Mol24    :return: Rdkit Mol25    """26    molecule_file = None27    try:28        pdbblock = Chem.MolToPDBBlock(mol)29        pdb_stringio = StringIO()30        pdb_stringio.write(pdbblock)31        pdb_stringio.seek(0)32        import pdbfixer33        fixer = pdbfixer.PDBFixer(pdbfile=pdb_stringio)34        fixer.findMissingResidues()35        fixer.findMissingAtoms()36        fixer.addMissingAtoms()37        fixer.addMissingHydrogens(7.4)38        hydrogenated_io = StringIO()39        import simtk40        simtk.openmm.app.PDBFile.writeFile(fixer.topology, fixer.positions, hydrogenated_io)41        hydrogenated_io.seek(0)42        return Chem.MolFromPDBBlock(hydrogenated_io.read(), sanitize=False, removeHs=False)43    except ValueError as e:44        print('Unable to add hydrogens %s' % e, file=stderr)45        raise MoleculeLoadException(e)46    finally:47        try:48            os.remove(molecule_file)49        except (OSError, TypeError):50            pass51def compute_charges(mol):52    try:53        AllChem.ComputeGasteigerCharges(mol)54    except Exception as e:55        print('Unable to compute charges for mol', file=stderr)56        raise MoleculeLoadException(e)57    return mol58def load_molecule(molecule_file, add_hydrogens=True, calc_charges=True, sanitize=False):59    if '.mol2' in molecule_file:60        my_mol = Chem.MolFromMol2File(molecule_file, sanitize=False, removeHs=False)61    elif '.sdf' in molecule_file:62        suppl = Chem.SDMolSupplier(str(molecule_file), sanitize=False)63        my_mol = suppl[0]64    elif '.pdb' in molecule_file:65        my_mol = Chem.MolFromPDBFile(str(molecule_file), sanitize=False, removeHs=False)66    else:67        raise ValueError('Unrecognized file type')68    if my_mol is None:69        raise ValueError('Unable to read non None Molecule Object')70    if add_hydrogens or calc_charges:71        my_mol = add_hydrogens_to_mol(my_mol)72    if sanitize:73        Chem.SanitizeMol(my_mol)74    if calc_charges:75        compute_charges(my_mol)76    xyz = get_xyz_from_mol(my_mol)...

test_predictor.py

Source:test_predictor.py

1from pathlib import Path2from click.testing import CliRunner3import bondnet4from bondnet.prediction.predictor import (5    predict_single_molecule,6    predict_multiple_molecules,7    predict_by_reactions,8)9from bondnet.scripts.predict_cli import cli10def test_predict_single_molecule():11    predict_single_molecule(model_name="bdncm", molecule="CC")12    predict_single_molecule(model_name="pubchem", molecule="CC")13def test_predict_multiple_molecules():14    prefix = Path(bondnet.__file__).parent.joinpath("scripts", "examples", "predict")15    molecule_file = prefix.joinpath("molecules.sdf")16    charge_file = prefix.joinpath("charges.txt")17    predict_multiple_molecules(18        model_name="bdncm",19        molecule_file=molecule_file,20        charge_file=charge_file,21        out_file="/tmp/bde.sdf",22        format="sdf",23    )24    predict_multiple_molecules(25        model_name="pubchem",26        molecule_file=molecule_file,27        charge_file=None,28        out_file="/tmp/bde.sdf",29        format="sdf",30    )31def test_predict_by_reaction():32    prefix = Path(bondnet.__file__).parent.joinpath("scripts", "examples", "predict")33    molecule_file = prefix.joinpath("molecules.sdf")34    rxn_file = prefix.joinpath("reactions.csv")35    charge_file = prefix.joinpath("charges.txt")36    predict_by_reactions(37        model_name="bdncm",38        molecule_file=molecule_file,39        reaction_file=rxn_file,40        charge_file=charge_file,41        out_file="/tmp/bde.csv",42        format="sdf",43    )44    predict_by_reactions(45        model_name="bdncm",46        molecule_file=molecule_file,47        reaction_file=rxn_file,48        charge_file=None,49        out_file="/tmp/bde.csv",50        format="sdf",51    )52def test_cli():53    runner = CliRunner()54    result = runner.invoke(cli, ["single", "CC", "0"])55    molecule_file = Path(bondnet.__file__).parent.joinpath(56        "prediction", "examples", "molecules.sdf"57    )...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.