How to use resetall method in pytest-mock

Best Python code snippet using pytest-mock

__main__.py

Source:__main__.py Github

copy

Full Screen

1#!/usr/bin/env python2import os3import sys4import argparse5import shlex6import shutil7import time8import subprocess9import glob10import tempfile11import errno12import tarfile13import json14import pathlib15from . import __version__ as tool_version16from .helpers import bco, print_error, check_file_exists, check_file_doesnt_exists17import stag.align as align18import stag.create_db as create_db19import stag.classify as classify20import stag.check_create_db_input_files as check_create_db_input_files21import stag.correct_seq as correct_seq22import stag.unzip_db as unzip_db23import stag.classify_genome as classify_genome24import stag.train_genome as train_genome25import stag.convert_ali as convert_ali26def handle_error(error, help_f=None):27 if help_f:28 help_f()29 print_error()30 print(error, file=sys.stderr)31 sys.exit(1)32# ------------------------------------------------------------------------------33# print the help informations34# ------------------------------------------------------------------------------35class CapitalisedHelpFormatter(argparse.HelpFormatter):36 def add_usage(self, usage, actions, groups, prefix=None):37 if prefix is None:38 prefix = ''39 return super(CapitalisedHelpFormatter, self).add_usage(usage, actions, groups, prefix)40def msg(name=None):41 str_msg = f'''42\0043{bco.Cyan}Program:{bco.ResetAll} stag - Supervised Taxonomic Assignment of marker Genes44{bco.Cyan}Version:{bco.ResetAll} '''+tool_version+f'''45{bco.Cyan}Usage:{bco.ResetAll} stag <command> [options]46{bco.Cyan}Command:{bco.ResetAll}47 {bco.LightGreen}-- Single gene{bco.ResetAll}48 {bco.LightBlue}train{bco.ResetAll} Train a classifier and create a database49 {bco.LightBlue}classify{bco.ResetAll} Taxonomically annotate a gene50 {bco.LightBlue}align{bco.ResetAll} Align a sequence to a hmm or infernal model51 {bco.LightBlue}create_db{bco.ResetAll} Create a database given the aligned sequences52 {bco.LightBlue}check_input{bco.ResetAll} Check the input for the train command53 {bco.LightBlue}correct_seq{bco.ResetAll} Correct sequences that are in wrong orientation54 {bco.LightBlue}convert_ali{bco.ResetAll} Convert between 1-hot-encoding and fasta, and vice versa55 {bco.LightBlue}unzip_db{bco.ResetAll} Create a directory with the content of a database56 {bco.LightGreen}-- Genome{bco.ResetAll}57 {bco.LightBlue}train_genome{bco.ResetAll} Merge classifiers of single genes58 {bco.LightBlue}classify_genome{bco.ResetAll} Taxonomically annotate a genome (predict genes, extract59 the database marker genes and classify them)60Type stag <command> to print the help for a specific command61 '''62 return str_msg63# ------------------------------------------------------------------------------64def print_menu_align():65 sys.stderr.write("\n")66 sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} align {bco.LightBlue}-i{bco.ResetAll} <fasta_seqs> {bco.LightBlue}-a{bco.ResetAll} <hmmfile> [options]\n\n")67 sys.stderr.write(f" {bco.LightBlue}-i{bco.ResetAll} FILE sequences to be aligned (fasta format) {bco.LightMagenta}[required]{bco.ResetAll}\n")68 sys.stderr.write(f" {bco.LightBlue}-p{bco.ResetAll} FILE protein sequences, corresponding to -i {bco.LightMagenta}[None]{bco.ResetAll}\n")69 sys.stderr.write(f" {bco.LightBlue}-a{bco.ResetAll} FILE hmmfile or cmfile to use as template for the alignment {bco.LightMagenta}[required]{bco.ResetAll}\n")70 sys.stderr.write(f" {bco.LightBlue}-o{bco.ResetAll} FILE output file name {bco.LightMagenta}[stdout]{bco.ResetAll}\n")71 sys.stderr.write(f" {bco.LightBlue}-c{bco.ResetAll} set if you are using a cmfile\n")72 sys.stderr.write(f" {bco.LightBlue}-m{bco.ResetAll} INT threshold for the number of features per sequence (percentage) {bco.LightMagenta}[0]{bco.ResetAll}\n")73 sys.stderr.write(f" {bco.LightBlue}-t{bco.ResetAll} INT number of threads {bco.LightMagenta}[1]{bco.ResetAll}\n")74 sys.stderr.write(f" {bco.LightBlue}-v{bco.ResetAll} INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")75 sys.stderr.write(f"{bco.Cyan}Note:{bco.ResetAll} if -p is provided, then the alignment will be done at the level\nof the proteins and then converted to gene alignment (from -i input).\nThe order of the sequences in -i and -p should be the same.\n\n")76# ------------------------------------------------------------------------------77def print_menu_create_db():78 sys.stderr.write("\n")79 sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} create_db {bco.LightBlue}-s{bco.ResetAll} <aligned_file> {bco.LightBlue}-x{bco.ResetAll} <taxonomy_file>\n")80 sys.stderr.write(f" {bco.LightBlue}-a{bco.ResetAll} <hmmfile> {bco.LightBlue}-o{bco.ResetAll} <output_DB> [options]\n\n")81 sys.stderr.write(f" {bco.LightBlue}-s{bco.ResetAll} FILE file with 1-hot encoding MSA (result from stag align) {bco.LightMagenta}[required]{bco.ResetAll}\n")82 sys.stderr.write(f" {bco.LightBlue}-a{bco.ResetAll} FILE hmmfile or cmfile to used as template for the alignment {bco.LightMagenta}[required]{bco.ResetAll}\n")83 sys.stderr.write(f" {bco.LightBlue}-c{bco.ResetAll} set if you are using a cmfile\n")84 sys.stderr.write(f" {bco.LightBlue}-x{bco.ResetAll} FILE taxonomy file (tab separated) {bco.LightMagenta}[required]{bco.ResetAll}\n")85 sys.stderr.write(f" {bco.LightBlue}-o{bco.ResetAll} FILE output file name (HDF5 format) {bco.LightMagenta}[required]{bco.ResetAll}\n")86 sys.stderr.write(f" {bco.LightBlue}-f{bco.ResetAll} force to rewrite output file\n")87 sys.stderr.write(f" {bco.LightBlue}-C{bco.ResetAll} FILE save intermediate cross validation results {bco.LightMagenta}[None]{bco.ResetAll}\n")88 sys.stderr.write(f" {bco.LightBlue}-p{bco.ResetAll} FILE protein sequences, if they were used for the alignment {bco.LightMagenta}[None]{bco.ResetAll}\n")89 sys.stderr.write(f" {bco.LightBlue}-e{bco.ResetAll} STR penalty for the logistic regression {bco.LightMagenta}[\"l1\"]{bco.ResetAll}\n")90 sys.stderr.write(f" {bco.LightBlue}-E{bco.ResetAll} STR solver for the logistic regression {bco.LightMagenta}[\"liblinear\"]{bco.ResetAll}\n")91 sys.stderr.write(f" {bco.LightBlue}-t{bco.ResetAll} INT number of threads {bco.LightMagenta}[1]{bco.ResetAll}\n")92 sys.stderr.write(f" {bco.LightBlue}-v{bco.ResetAll} INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")93# ------------------------------------------------------------------------------94def print_menu_classify():95 sys.stderr.write("\n")96 sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} classify {bco.LightBlue}-d{bco.ResetAll} <database> [{bco.LightBlue}-i{bco.ResetAll}/{bco.LightBlue}-s{bco.ResetAll}] <seq_file> [options]\n\n")97 sys.stderr.write(f" {bco.LightBlue}-d{bco.ResetAll} FILE database created with create_db or train {bco.LightMagenta}[required]{bco.ResetAll}\n")98 sys.stderr.write(f" {bco.LightBlue}-i{bco.ResetAll} FILE sequences to taxonomically annotate (fasta format) {bco.LightMagenta}[required]{bco.ResetAll}\n")99 sys.stderr.write(f" {bco.LightBlue}-s{bco.ResetAll} FILE aligned sequences, can be provided instead of -i {bco.ResetAll}\n")100 sys.stderr.write(f" {bco.LightBlue}-p{bco.ResetAll} FILE protein sequences, corresponding to -i {bco.LightMagenta}[None]{bco.ResetAll}\n")101 sys.stderr.write(f" {bco.LightBlue}-S{bco.ResetAll} FILE save intermediate alignment file {bco.LightMagenta}[None]{bco.ResetAll}\n")102 sys.stderr.write(f" {bco.LightBlue}-o{bco.ResetAll} FILE output file name {bco.LightMagenta}[stdout]{bco.ResetAll}\n")103 sys.stderr.write(f" {bco.LightBlue}-l{bco.ResetAll} long output (with more information about the classification)\n")104 sys.stderr.write(f" {bco.LightBlue}-m{bco.ResetAll} INT threshold for the number of features per sequence (percentage) {bco.LightMagenta}[0]{bco.ResetAll}\n")105 sys.stderr.write(f" {bco.LightBlue}-v{bco.ResetAll} INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")106# ------------------------------------------------------------------------------107def print_menu_check_input():108 sys.stderr.write("\n")109 sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} check_input {bco.LightBlue}-i{bco.ResetAll} <fasta_seqs> {bco.LightBlue}-x{bco.ResetAll} <taxonomy_file>\n")110 sys.stderr.write(f" {bco.LightBlue}-a{bco.ResetAll} <hmmfile> [options]\n\n")111 sys.stderr.write(f" {bco.LightBlue}-i{bco.ResetAll} FILE sequences to be aligned (fasta format) {bco.LightMagenta}[required]{bco.ResetAll}\n")112 sys.stderr.write(f" {bco.LightBlue}-p{bco.ResetAll} FILE protein sequences, corresponding to -i {bco.LightMagenta}[None]{bco.ResetAll}\n")113 sys.stderr.write(f" {bco.LightBlue}-a{bco.ResetAll} FILE hmmfile or cmfile to used as template for the alignment {bco.LightMagenta}[required]{bco.ResetAll}\n")114 sys.stderr.write(f" {bco.LightBlue}-c{bco.ResetAll} set if you are using a cmfile\n")115 sys.stderr.write(f" {bco.LightBlue}-x{bco.ResetAll} FILE taxonomy file (tab separated) {bco.LightMagenta}[required]{bco.ResetAll}\n")116 sys.stderr.write(f" {bco.LightBlue}-w{bco.ResetAll} FILE save warning messages to a file {bco.LightMagenta}[None]{bco.ResetAll}\n")117 sys.stderr.write(f" {bco.LightBlue}-v{bco.ResetAll} INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")118# ------------------------------------------------------------------------------119def print_menu_train():120 sys.stderr.write("\n")121 sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} train {bco.LightBlue}-i{bco.ResetAll} <fasta_seqs> {bco.LightBlue}-x{bco.ResetAll} <taxonomy_file>\n")122 sys.stderr.write(f" {bco.LightBlue}-a{bco.ResetAll} <hmmfile> {bco.LightBlue}-o{bco.ResetAll} <output_DB> [options]\n\n")123 sys.stderr.write(f" {bco.LightBlue}-i{bco.ResetAll} FILE sequences to be aligned (fasta format) {bco.LightMagenta}[required]{bco.ResetAll}\n")124 sys.stderr.write(f" {bco.LightBlue}-p{bco.ResetAll} FILE protein sequences, corresponding to -i {bco.LightMagenta}[None]{bco.ResetAll}\n")125 sys.stderr.write(f" {bco.LightBlue}-a{bco.ResetAll} FILE hmmfile or cmfile to used as template for the alignment {bco.LightMagenta}[required]{bco.ResetAll}\n")126 sys.stderr.write(f" {bco.LightBlue}-c{bco.ResetAll} set if you are using a cmfile\n")127 sys.stderr.write(f" {bco.LightBlue}-x{bco.ResetAll} FILE taxonomy file (tab separated) {bco.LightMagenta}[required]{bco.ResetAll}\n")128 sys.stderr.write(f" {bco.LightBlue}-o{bco.ResetAll} FILE output file name (HDF5 format) {bco.LightMagenta}[required]{bco.ResetAll}\n")129 sys.stderr.write(f" {bco.LightBlue}-f{bco.ResetAll} force to rewrite output file\n\n")130 sys.stderr.write(f" {bco.LightBlue}-S{bco.ResetAll} FILE save intermediate alignment file {bco.LightMagenta}[None]{bco.ResetAll}\n")131 sys.stderr.write(f" {bco.LightBlue}-C{bco.ResetAll} FILE save intermediate cross validation results {bco.LightMagenta}[None]{bco.ResetAll}\n")132 sys.stderr.write(f" {bco.LightBlue}-t{bco.ResetAll} INT number of threads {bco.LightMagenta}[1]{bco.ResetAll}\n")133 sys.stderr.write(f" {bco.LightBlue}-m{bco.ResetAll} INT threshold for the number of features per sequence (percentage) {bco.LightMagenta}[0]{bco.ResetAll}\n")134 sys.stderr.write(f" {bco.LightBlue}-v{bco.ResetAll} INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")135 sys.stderr.write(f" {bco.LightBlue}-e{bco.ResetAll} STR penalty for the logistic regression {bco.LightMagenta}[\"l1\"]{bco.ResetAll}\n")136 sys.stderr.write(f" {bco.LightBlue}-E{bco.ResetAll} STR solver for the logistic regression {bco.LightMagenta}[\"liblinear\"]{bco.ResetAll}\n\n")137 sys.stderr.write(f"{bco.Cyan}Note:{bco.ResetAll} if -p is provided, then the alignment will be done at the level\nof the proteins and then converted to gene alignment (from -i input).\nThe order of the sequences in -i and -p should be the same.\n\n")138# ------------------------------------------------------------------------------139def print_menu_correct_seq():140 sys.stderr.write("\n")141 sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} correct_seq {bco.LightBlue}-i{bco.ResetAll} <fasta_seqs> {bco.LightBlue}-a{bco.ResetAll} <hmmfile> [options]\n\n")142 sys.stderr.write(f" {bco.LightBlue}-i{bco.ResetAll} FILE sequences to be aligned (fasta format) {bco.LightMagenta}[required]{bco.ResetAll}\n")143 sys.stderr.write(f" {bco.LightBlue}-a{bco.ResetAll} FILE hmmfile or cmfile to use as template for the alignment {bco.LightMagenta}[required]{bco.ResetAll}\n")144 sys.stderr.write(f" {bco.LightBlue}-o{bco.ResetAll} FILE output file name {bco.LightMagenta}[stdout]{bco.ResetAll}\n")145 sys.stderr.write(f" {bco.LightBlue}-c{bco.ResetAll} set if you are using a cmfile\n")146 sys.stderr.write(f" {bco.LightBlue}-m{bco.ResetAll} INT threshold for the number of features per sequence (percentage) {bco.LightMagenta}[5]{bco.ResetAll}\n")147 sys.stderr.write(f" {bco.LightBlue}-t{bco.ResetAll} INT number of threads {bco.LightMagenta}[1]{bco.ResetAll}\n")148 sys.stderr.write(f" {bco.LightBlue}-v{bco.ResetAll} INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")149# ------------------------------------------------------------------------------150def print_menu_train_genome():151 sys.stderr.write("\n")152 sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} train_genome {bco.LightBlue}-i{bco.ResetAll} <list_gene_DBs> {bco.LightBlue}-T{bco.ResetAll} <gene_thresholds>\n {bco.LightBlue}-o{bco.ResetAll} <output_DB> {bco.LightBlue}-C{bco.ResetAll} <concat_genes_DB> [options]\n\n")153 sys.stderr.write(f" {bco.LightBlue}-i{bco.ResetAll} LIST list of single gene databases to use (comma separated) {bco.LightMagenta}[required]{bco.ResetAll}\n")154 sys.stderr.write(f" {bco.LightBlue}-T{bco.ResetAll} FILE hmm treshold for selecting the genes {bco.LightMagenta}[required]{bco.ResetAll}\n")155 sys.stderr.write(f" {bco.LightBlue}-C{bco.ResetAll} FILE stag database for the concatenated genes{bco.LightMagenta}[required]{bco.ResetAll}\n")156 sys.stderr.write(f" {bco.LightBlue}-o{bco.ResetAll} FILE output file name (HDF5 format) {bco.LightMagenta}[required]{bco.ResetAll}\n")157 sys.stderr.write(f" {bco.LightBlue}-t{bco.ResetAll} INT number of threads {bco.LightMagenta}[1]{bco.ResetAll}\n")158 sys.stderr.write(f" {bco.LightBlue}-v{bco.ResetAll} INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")159# ------------------------------------------------------------------------------160def print_menu_classify_genome():161 sys.stderr.write("\n")162 sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} classify_genome {bco.LightBlue}-d{bco.ResetAll} <genome_database> {bco.LightBlue}-o{bco.ResetAll} res_dir\n")163 sys.stderr.write(f" [{bco.LightBlue}-i{bco.ResetAll} <fasta_seq>/{bco.LightBlue}-D{bco.ResetAll} <directory>/{bco.LightBlue}-G{bco.ResetAll} <markers.json>] [options]\n\n")164 sys.stderr.write(f" {bco.LightBlue}-d{bco.ResetAll} FILE database created with train_genome {bco.LightMagenta}[required]{bco.ResetAll}\n")165 sys.stderr.write(f" {bco.LightBlue}-i{bco.ResetAll} FILE genome fasta file\n")166 sys.stderr.write(f" {bco.LightBlue}-D{bco.ResetAll} DIR directory containing genome fasta files (only fasta\n files will be used)\n")167 sys.stderr.write(f" {bco.LightBlue}-G{bco.ResetAll} FILE json file pointing at a marker gene set (in lieu of a full genome)\n")168 sys.stderr.write(f" {bco.LightBlue}-o{bco.ResetAll} DIR output directory {bco.LightMagenta}[required]{bco.ResetAll}\n")169 sys.stderr.write(f" {bco.LightBlue}-l{bco.ResetAll} long output (with more information about the classification) {bco.LightMagenta}[False]\n")170 sys.stderr.write(f" {bco.LightBlue}-v{bco.ResetAll} INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n")171 sys.stderr.write(f" {bco.LightBlue}-r{bco.ResetAll} use all genes above the filter {bco.LightMagenta}[False]{bco.ResetAll}\n\n")172 sys.stderr.write(f" {bco.LightBlue}-t{bco.ResetAll} INT number of threads {bco.LightMagenta}[1]{bco.ResetAll}\n")173# ------------------------------------------------------------------------------174def print_menu_convert_ali():175 sys.stderr.write("\n")176 sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} convert_ali {bco.LightBlue}-i{bco.ResetAll} <file_in> {bco.LightBlue}-o{bco.ResetAll} <file_out> [options]\n\n")177 sys.stderr.write(f" {bco.LightBlue}-i{bco.ResetAll} FILE Input file, either a 1-hot-encoding created by stag align,\n")178 sys.stderr.write(f" or a fasta file of aligned sequences created by hmmalign.\n")179 sys.stderr.write(f" The input type is detected automatically {bco.LightMagenta}[required]{bco.ResetAll}\n")180 sys.stderr.write(f" {bco.LightBlue}-o{bco.ResetAll} FILE A 1-hot-encoding if the input was fasta,\n")181 sys.stderr.write(f" or a fasta file if the input was 1-hot-encoding {bco.LightMagenta}[required]{bco.ResetAll}\n")182 sys.stderr.write(f" {bco.LightBlue}-v{bco.ResetAll} INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")183# ------------------------------------------------------------------------------184def print_menu_unzip_db():185 sys.stderr.write("\n")186 sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} unzip_db {bco.LightBlue}-d{bco.ResetAll} <database> {bco.LightBlue}-o{bco.ResetAll} <dir_out> [options]\n\n")187 sys.stderr.write(f" {bco.LightBlue}-d{bco.ResetAll} FILE database created with create_db or train {bco.LightMagenta}[required]{bco.ResetAll}\n")188 sys.stderr.write(f" {bco.LightBlue}-o{bco.ResetAll} DIR create a dir with the unzipped database {bco.LightMagenta}[required]{bco.ResetAll}\n")189 sys.stderr.write(f" {bco.LightBlue}-v{bco.ResetAll} INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")190# ------------------------------------------------------------------------------191# MAIN192# ------------------------------------------------------------------------------193def main(argv=None):194 parser = argparse.ArgumentParser(usage=msg(), formatter_class=CapitalisedHelpFormatter,add_help=False)195 parser.add_argument('command', action="store", default=None, help='mode to run stag',196 choices=['align','train','classify','create_db','check_input','correct_seq','train_genome',197 'classify_genome','test','convert_ali',"unzip_db"])198 parser.add_argument('-o', action="store", dest='output', default=None, help='name of output file')199 parser.add_argument('-t', type=int, action="store", dest='threads', default=1, help='Number of threads to be used.')200 parser.add_argument('-v', action='store', type=int, default=3, dest='verbose', help='Verbose levels', choices=list(range(1,5)))201 parser.add_argument('-c', action='store_true', dest='use_cm_align', help='Set if you want to use cmalign isntead of hmmalign')202 parser.add_argument('-s', action="store", default=None,dest='aligned_sequences', help='sequences that needs to be aligned')203 parser.add_argument('-a', action="store", default=None,dest='template_al', help='alignment template')204 parser.add_argument('-x', action="store", default=None,dest='taxonomy', help='taxonomy file path')205 parser.add_argument('-f', action='store_true', dest='force_rewrite', help='Set if you want to rewrite the file, even if it exists')206 parser.add_argument('-i', action="store", dest='fasta_input', default=None, help='input fasta sequences')207 parser.add_argument('-p', action="store", dest='protein_fasta_input', default=None, help='input fasta sequences, in protein format. Corresponding to the -i sequences')208 parser.add_argument('-w', action="store", dest='warning_file_check_input', default=None, help='for check_input there can be quite some warning messages. Use -w to save them to a file')209 parser.add_argument('-d', action="store", dest='database', default=None, help='file containing the database')210 parser.add_argument('-S', action="store", dest='intermediate_al', default=None, help='name of the file for the intermediate alignment')211 parser.add_argument('-C', action="store", dest='intermediate_cross_val', default=None, help='name of the file for the intermediate cross validation results')212 parser.add_argument('-m', action='store', type=int, default=None, dest='min_perc_state', help='Minimum number of mapping states, i.e. how many features of the classifier we cover')213 parser.add_argument('-l', action='store_true', dest='long_out', help='Print more columns for the classification pipeline')214 parser.add_argument('-r', action='store_true', dest='keep_all_genes', help='keep all genes when doing the classification of genomes')215 parser.add_argument('-D', action="store", dest='dir_input', default=None, help='input directory')216 parser.add_argument('-T', action="store", dest='file_thresholds', default=None, help='file with the thresholds for the genes in the genome classifier') # basically the minimum score required217 parser.add_argument('-e', action="store", default="l1", dest='penalty_logistic', help='penalty for the logistic regression',choices=['l1','l2','none'])218 parser.add_argument('-E', action="store", default="liblinear", dest='solver_logistic', help='solver for the logistic regression',choices=['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'])219 parser.add_argument('-G', action="store", dest="marker_genes", default=None, help="Set of identified marker genes in lieu of a genomic sequence")220 parser.add_argument('--version', action='version', version='%(prog)s {0} on python {1}'.format(tool_version, sys.version.split()[0]))221 args = parser.parse_args()222 # --------------------------------------------------------------------------223 # TEST routine224 # --------------------------------------------------------------------------225 if args.command == 'test':226 popenCMD = "stag_test"227 child = subprocess.Popen(popenCMD)228 child.communicate()229 rc = child.wait()230 return(rc)231 # --------------------------------------------------------------------------232 # set defaults for the parameters233 # --------------------------------------------------------------------------234 if args.command == 'correct_seq':235 if (args.min_perc_state is None): args.min_perc_state = 5236 else:237 if (args.min_perc_state is None): args.min_perc_state = 0238 if args.threads < 1:239 handle_error("number of threads (-t) is less than 1", None)240 if args.min_perc_state < 0 or args.min_perc_state > 100:241 handle_error("-m should be between 0 and 100. It represents the percentage\n"242 "of internal states covered by the sequence (i.e. the number of features).", None)243 # --------------------------------------------------------------------------244 # ALIGN routine245 # --------------------------------------------------------------------------246 error = ""247 if args.command == 'align':248 # check that '-i' and '-a' have been provided249 if not args.fasta_input:250 error = "missing <seqfile> (-i)"251 elif not args.template_al:252 error = "missing <hmmfile>/<cmfile> (-a)"253 if error:254 handle_error(error, print_menu_align)255 # check that '-s' and '-a' are files256 check_file_exists(args.fasta_input, isfasta = True)257 check_file_exists(args.template_al, isfasta = False)258 # if -p is provided, then check that it is a fasta file259 if args.protein_fasta_input:260 check_file_exists(args.protein_fasta_input, isfasta = True)261 # call the function262 if args.output is None:263 for ali in align.align_generator(args.fasta_input, args.protein_fasta_input, args.template_al,264 args.use_cm_align, args.threads, args.verbose, False, args.min_perc_state):265 print(ali)266 else:267 align.align_file(args.fasta_input, args.protein_fasta_input, args.template_al, args.use_cm_align, args.threads,268 args.verbose, args.output, args.min_perc_state)269 # --------------------------------------------------------------------------270 # CREATE_DB routine271 # --------------------------------------------------------------------------272 elif args.command == 'create_db':273 if not args.aligned_sequences:274 # check that '-s' has been provided (alignment produced by stag align)275 error = "missing <aligned_file> (-s)"276 elif not args.taxonomy:277 # check that '-x' has been provided (taxonomy file)278 error = "missing <taxonomy_file> (-x)"279 elif not args.template_al:280 # check that the hmm file is provided281 error = "missing <hmmfile>/<cmfile> (-a)"282 elif not args.output:283 # check that output is set284 error = "missing <output_DB> (-o)"285 if error:286 handle_error(error, print_menu_create_db)287 # check that '-s' and '-a' are files288 check_file_exists(args.aligned_sequences, isfasta = False)289 check_file_exists(args.taxonomy, isfasta = False)290 check_file_exists(args.template_al, isfasta = False)291 if not args.force_rewrite:292 check_file_doesnt_exists(args.output)293 # call the function to create the database294 create_db.create_db(args.aligned_sequences, args.taxonomy, args.verbose, args.output, args.use_cm_align,295 args.template_al, args.intermediate_cross_val, args.protein_fasta_input,296 args.penalty_logistic, args.solver_logistic, procs=args.threads)297 # --------------------------------------------------------------------------298 # TRAIN routine299 # --------------------------------------------------------------------------300 elif args.command == 'train':301 # check that '-i' and '-a' have been provided302 if not args.fasta_input:303 error = "missing <seqfile> (-i)"304 elif not args.template_al:305 error = "missing <hmmfile>/<cmfile> (-a)"306 elif not args.taxonomy:307 # check that '-x' has been provided (taxonomy file)308 error = "missing <taxonomy_file> (-x)"309 elif not args.output:310 # check that output is set311 error = "missing <output_DB> (-o)"312 if error:313 handle_error(error, print_menu_train)314 # check that '-s' and '-a' are files315 check_file_exists(args.fasta_input, isfasta = True)316 check_file_exists(args.template_al, isfasta = False)317 # if -p is provided, then check that it is a fasta file318 if args.protein_fasta_input:319 check_file_exists(args.protein_fasta_input, isfasta = True)320 check_file_exists(args.taxonomy, isfasta = False)321 if not args.force_rewrite:322 check_file_doesnt_exists(args.output)323 # FIRST: ALIGN ---------------------------------------------------------324 # we create a temporary file that will contain the alignments325 al_file = tempfile.NamedTemporaryFile(delete=False, mode="w")326 os.chmod(al_file.name, 0o644)327 # call the function328 align.align_file(args.fasta_input, args.protein_fasta_input, args.template_al, args.use_cm_align,329 args.threads, args.verbose, al_file.name, args.min_perc_state)330 # SECOND: CREATE_DB ----------------------------------------------------331 # call the function to create the database332 create_db.create_db(al_file.name, args.taxonomy, args.verbose, args.output, args.use_cm_align,333 args.template_al, args.intermediate_cross_val, args.protein_fasta_input,334 args.penalty_logistic, args.solver_logistic, procs=args.threads)335 # what to do with intermediate alignment -------------------------------336 if not args.intermediate_al:337 # remove it338 os.remove(al_file.name)339 else:340 # save it341 shutil.move(al_file.name, args.intermediate_al)342 # --------------------------------------------------------------------------343 # CLASSIFY routine344 # --------------------------------------------------------------------------345 elif args.command == 'classify':346 # check that '-i' has been provided (alignment produced by stag align)347 if not args.fasta_input and not args.aligned_sequences:348 error = "missing <fasta_seqs> (-i) or <aligned_seq> (-s)"349 elif not args.database:350 # check that '-d' has been provided (taxonomy file)351 error = "missing <database> (-d)"352 if error:353 handle_error(error, print_menu_classify)354 # check that they are files355 if args.fasta_input:356 check_file_exists(args.fasta_input, isfasta = True)357 check_file_exists(args.database, isfasta = False)358 # if -p is provided, then check that it is a fasta file359 if args.protein_fasta_input:360 check_file_exists(args.protein_fasta_input, isfasta = True)361 # if -S is provided, we remove the file if it exists, since in the362 # function it appends only363 if args.intermediate_al:364 if os.path.isfile(args.intermediate_al):365 os.remove(args.intermediate_al)366 # call the function367 classify.classify(args.database, fasta_input=args.fasta_input, protein_fasta_input=args.protein_fasta_input, 368 verbose=args.verbose, threads=args.threads, output=args.output, long_out=args.long_out, 369 current_tool_version=tool_version, aligned_sequences=args.aligned_sequences,370 save_ali_to_file=args.intermediate_al, min_perc_state=args.min_perc_state)371 # --------------------------------------------------------------------------372 # CHECK_INPUT routine373 # --------------------------------------------------------------------------374 elif args.command == 'check_input':375 if not args.fasta_input:376 error = "missing <fasta_seqs> (-i)"377 elif not args.taxonomy:378 error = "missing <taxonomy_file> (-x)"379 elif not args.template_al:380 error = "missing <hmmfile>/<cmfile> (-a)"381 if error:382 handle_error(error, print_menu_check_input)383 check_create_db_input_files.check_input_files(args.fasta_input, args.protein_fasta_input, args.taxonomy,384 args.template_al, args.use_cm_align, args.warning_file_check_input)385 # --------------------------------------------------------------------------386 # CORRECT_SEQ routine387 # --------------------------------------------------------------------------388 # check if the sequences are in correct orientation, if they are not, then389 # take reverse complement. Save to -o all the seqeunces is correct order390 elif args.command == 'correct_seq':391 # check that '-i' and '-a' have been provided392 if not args.fasta_input:393 error = "missing <seqfile> (-i)"394 elif not args.template_al:395 error = "missing <hmmfile>/<cmfile> (-a)"396 if error:397 handle_error(error, print_menu_correct_seq)398 # check that '-s' and '-a' are files399 check_file_exists(args.fasta_input, isfasta = True)400 check_file_exists(args.template_al, isfasta = False)401 # call the function402 correct_seq.correct_seq(args.fasta_input, args.template_al, args.use_cm_align, args.threads, args.verbose,403 args.min_perc_state, args.output)404 # --------------------------------------------------------------------------405 # CONVERT_ALI routine406 # --------------------------------------------------------------------------407 elif args.command == 'convert_ali':408 # check that '-i' and '-o' have been provided409 if not args.fasta_input:410 error = "missing <file_in> (-i)"411 elif not args.output:412 error = "missing <file_out> (-o)"413 if error:414 handle_error(error, print_menu_convert_ali)415 # check that '-i' is a file416 check_file_exists(args.fasta_input, isfasta = False)417 # call function418 convert_ali.convert_ali(args.fasta_input, args.output, args.verbose)419 # --------------------------------------------------------------------------420 # UNZIP_db routine421 # --------------------------------------------------------------------------422 elif args.command == 'unzip_db':423 # check that '-d' and '-o' have been provided424 if not args.database:425 error = "missing <database> (-d)"426 elif not args.output:427 error = "missing <dir_out> (-o)"428 if error:429 handle_error(error, print_menu_unzip_db)430 # check that '-d' is a file431 check_file_exists(args.database, isfasta = False)432 # call function433 unzip_db.unzip_db(args.database, args.verbose, args.output)434 # --------------------------------------------------------------------------435 # TRAIN_GENOME routine436 # --------------------------------------------------------------------------437 # We want to have a database for classifying genomes. It will contains the438 # calssifiers for the seingle genes439 # Input: single gene databases440 # Output: a database file (hdf5) that can be used by stag classify_genome441 elif args.command == 'train_genome':442 # check that parameters are set ----------------------------------------443 if not args.output:444 error = "missing <output_DB> (-o)"445 elif not args.fasta_input:446 error = "missing <list_gene_DBs> (-i)"447 elif not args.file_thresholds:448 error = "missing <gene_thresholds> (-T)"449 elif not args.intermediate_cross_val:450 error = "missing <concat_genes_DB> (-C)"451 if error:452 handle_error(error, print_menu_train_genome)453 # call the function454 train_genome.train_genome(args.output, args.fasta_input, args.file_thresholds,455 args.threads, args.verbose, args.intermediate_cross_val)456 # --------------------------------------------------------------------------457 # CLASSIFY_GENOME routine458 # --------------------------------------------------------------------------459 if args.command == 'classify_genome':460 # check input461 if not args.database:462 error = "missing <database> (-d)"463 elif not any((args.fasta_input, args.dir_input, args.marker_genes)):464 error = "you need to provide at least -i, -D, or -G."465 elif sum(map(bool, (args.fasta_input, args.dir_input, args.marker_genes))) != 1:466 error = "options -i, -D, and -G are mutually exclusive"467 elif args.dir_input and not os.path.isdir(args.dir_input):468 error = "-D is not a directory."469 elif args.marker_genes and not os.path.isfile(args.marker_genes):470 error = "-G is not a valid file."471 elif not args.output:472 # check that output dir is defined473 error = "missing output directory (-o)"474 if error:475 handle_error(error, print_menu_classify_genome)476 # find files to classify477 marker_genes, list_files = list(), list()478 if args.fasta_input:479 check_file_exists(args.fasta_input, isfasta = True)480 list_files.append(args.fasta_input)481 elif args.marker_genes:482 marker_genes = [args.marker_genes]483 else:484 for f in os.listdir(args.dir_input):485 f = os.path.join(args.dir_input, f)486 try:487 if os.path.isfile(f) and open(f).read(1)[0] == ">":488 list_files.append(f)489 except Exception as e:490 if args.verbose > 1:491 sys.stderr.write("[W::main] Warning: ")492 sys.stderr.write("Cannot open file: {}\n".format(f))493 if not list_files:494 handle_error("no fasta files found in the directory.", None)495 sys.stderr.write(" Found "+str(len(list_files))+" fasta files\n")496 if os.path.isdir(args.output):497 if args.force_rewrite:498 shutil.rmtree(args.output)499 else:500 handle_error("output directory (-o {}) exists already.".format(args.output), None)501 # create output dir502 try:503 pathlib.Path(args.output).mkdir(exist_ok=True, parents=True)504 except:505 handle_error("creating the output directory (-o {}).".format(args.output), None)506 if list_files:507 from stag.classify_genome import validate_genome_files508 validate_genome_files(list_files)509 # call the function510 classify_genome.classify_genome(args.database, genome_files=list_files, marker_genes=marker_genes,511 verbose=args.verbose, threads=args.threads,512 output=args.output, long_out=args.long_out, keep_all_genes=args.keep_all_genes)513 return None # success514#-------------------------------- run main -------------------------------------515if __name__ == '__main__':...

Full Screen

Full Screen

stag_test.py

Source:stag_test.py Github

copy

Full Screen

1#!/usr/bin/env python2# ============================================================================ #3# stag_test Run stag tests4#5# Author: Alessio Milanese (milanese@embl.de)6#7# ============================================================================ #8import time9import os10import sys11import tempfile12import subprocess13import shlex14import errno15import pkg_resources16import urllib.request17import hashlib18from pathlib import Path19import shutil20from .helpers import bco, is_tool, is_tool_and_return021TEST_DATA_PATH = pkg_resources.resource_filename("stag", "test")22# check md523def md5(fname):24 hash_md5 = hashlib.md5()25 with open(fname, "rb") as f:26 for chunk in iter(lambda: f.read(4096), b""):27 hash_md5.update(chunk)28 return hash_md5.hexdigest()29# download a file if it's not there30def download_file(url, filename):31 downloaded_correct = True32 try:33 urllib.request.urlretrieve(url, filename)34 except:35 downloaded_correct = False36 if downloaded_correct:37 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")38 else:39 sys.stderr.write(f"{bco.Yellow}{bco.Bold} error{bco.ResetAll}\n")40# function to download and decompress a tar.gz41def download_and_checkmd5_and_decompress(url, filename, md5_db, destination):42 # we remove a dir if it exist already43 shutil.rmtree(filename[0:-7], ignore_errors=True)44 # check if the file is already downloaded45 my_file = Path(filename)46 if my_file.is_file():47 sys.stderr.write(" ■ already downloaded\n")48 else:49 sys.stderr.write(" ■ download file: ")50 download_file(url, filename)51 # check md552 sys.stderr.write(" ■ check md5: ")53 current_md5 = md5(filename)54 if current_md5 == md5_db:55 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")56 else:57 sys.stderr.write(f"{bco.Yellow}{bco.Bold} different{bco.ResetAll}\n")58 sys.stderr.write(" ■ Re-download file: ")59 download_file(url, filename)60 sys.stderr.write(" ■ check md5: ")61 current_md5 = md5(filename)62 if current_md5 == md5_db:63 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")64 else:65 sys.stderr.write(f"{bco.Yellow}{bco.Bold} different{bco.ResetAll}\n")66 # decompress67 sys.stderr.write(" ■ Unzip file: ")68 extract_cmd = "tar -zxvf "+filename+" -C "+destination69 try:70 FNULL = open(os.devnull, 'w')71 process = subprocess.Popen(extract_cmd.split(),stderr=FNULL,stdout=FNULL)72 output, error = process.communicate()73 except:74 sys.stderr.write(f"{bco.Yellow}{bco.Bold} error{bco.ResetAll}\n")75 if process.returncode:76 sys.stderr.write(f"{bco.Yellow}{bco.Bold} error{bco.ResetAll}\n")77 else:78 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")79 # we return the reulting directory, which should have the same name as80 # the file downloaded minus ".tar.gz"81 return filename[0:-7]+"/"82# ------------------------------------------------------------------------------83# MAIN84# ------------------------------------------------------------------------------85def main(argv=None):86 sys.stderr.write(f"{bco.Blue}{bco.Bold} ------------------------------------------------------------------------------{bco.ResetAll}\n")87 sys.stderr.write(f"{bco.Blue}{bco.Bold}|{bco.Green} TEST STAG {bco.Blue}|{bco.ResetAll}\n")88 sys.stderr.write(f"{bco.Blue}{bco.Bold} ------------------------------------------------------------------------------{bco.ResetAll}\n")89 error_found = False90 # CHECK TOOLS ==============================================================91 sys.stderr.write(f"{bco.Cyan}{bco.Bold}1-- Tools and versions:{bco.ResetAll}\n")92 # check python version -----------------------------------------------------93 sys.stderr.write(" ■ python: ")94 python_version = sys.version_info95 if(python_version >= (3,0,0)):96 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")97 else:98 sys.stderr.write(f"{bco.Yellow}{bco.Bold} WARNING: python2 is not supported{bco.ResetAll}\n\n")99 error_found = True100 # check hmmer --------------------------------------------------------------101 sys.stderr.write(" ■ hmmalign: ")102 if is_tool("hmmalign"):103 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")104 else:105 sys.stderr.write(f"{bco.Yellow}{bco.Bold} WARNING. HMMER3 is not in the path{bco.ResetAll}\n\n")106 error_found = True107 # check Easel --------------------------------------------------------------108 sys.stderr.write(" ■ esl-reformat: ")109 if is_tool("esl-reformat"):110 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")111 else:112 sys.stderr.write(f"{bco.Yellow}{bco.Bold} WARNING. EASEL is not in the path{bco.ResetAll}\n\n")113 error_found = True114 # check Easel --------------------------------------------------------------115 sys.stderr.write(" ■ seqtk: ")116 if is_tool("seqtk"):117 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")118 else:119 sys.stderr.write(f"{bco.Yellow}{bco.Bold} WARNING. seqtk is not in the path{bco.ResetAll}\n\n")120 error_found = True121 # Python libraries:122 sys.stderr.write(" ■ (L)numpy: ") #------------------------------------123 library_correct = True124 try:125 import numpy126 except ImportError as e:127 library_correct = False128 if library_correct:129 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")130 else:131 sys.stderr.write(f"{bco.Yellow}{bco.Bold} WARNING. numpy is missing{bco.ResetAll}\n\n")132 error_found = True133 sys.stderr.write(" ■ (L)pandas: ") #------------------------------------134 library_correct = True135 try:136 import pandas137 except ImportError as e:138 library_correct = False139 if library_correct:140 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")141 else:142 sys.stderr.write(f"{bco.Yellow}{bco.Bold} WARNING. pandas is missing{bco.ResetAll}\n\n")143 error_found = True144 sys.stderr.write(" ■ (L)sklearn: ") #------------------------------------145 library_correct = True146 try:147 import sklearn148 except ImportError as e:149 library_correct = False150 if library_correct:151 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")152 else:153 sys.stderr.write(f"{bco.Yellow}{bco.Bold} WARNING. sklearn is missing{bco.ResetAll}\n\n")154 error_found = True155 sys.stderr.write(" ■ (L)h5py: ") #------------------------------------156 library_correct = True157 try:158 import h5py159 except ImportError as e:160 library_correct = False161 if library_correct:162 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")163 else:164 sys.stderr.write(f"{bco.Yellow}{bco.Bold} WARNING. h5py is missing{bco.ResetAll}\n\n")165 error_found = True166 # TRY TO RUN STAG ==========================================================167 sys.stderr.write(f"{bco.Cyan}{bco.Bold}2-- Run stag:{bco.ResetAll}\n")168 sys.stderr.write(" ■ train: ") #--------------------------------------169 sys.stderr.flush()170 seq_file = os.path.join(TEST_DATA_PATH, "sequences.fasta")171 tax_file = os.path.join(TEST_DATA_PATH, "sequences.taxonomy")172 hmm_file = os.path.join(TEST_DATA_PATH, "gene.hmm")173 temp_file_db = tempfile.NamedTemporaryFile(delete=False, mode="w")174 t0 = time.time()175 stag_command = "stag train -f -o "+temp_file_db.name+" -i "+seq_file+" -x "+tax_file+" -a "+hmm_file176 process = subprocess.run(stag_command.split())177 runtime = time.time() - t0178 if process.returncode:179 sys.stderr.write(f"{bco.Red}{bco.Bold} Error{bco.ResetAll} ({runtime:.3f}s)\n")180 sys.exit(1)181 else:182 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll} ({runtime:.3f}s)\n")183 sys.stderr.write(" ■ classify: ") #--------------------------------------184 sys.stderr.flush()185 temp_file_res = tempfile.NamedTemporaryFile(delete=False, mode="w")186 t0 = time.time()187 stag_command = "stag classify -v 1 -d "+temp_file_db.name+" -i "+seq_file+" -o "+temp_file_res.name188 process = subprocess.run(stag_command.split())189 runtime = time.time() - t0190 if process.returncode:191 sys.stderr.write(f"{bco.Red}{bco.Bold} Error{bco.ResetAll} ({runtime:.3f}s)\n")192 sys.exit(1)193 else:194 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll} ({runtime:.3f}s)\n")195 # remove temp file196 os.remove(temp_file_db.name+".log")197 os.remove(temp_file_db.name)198 # CHECK THE RESULTING FILE =================================================199 sys.stderr.write(f"{bco.Cyan}{bco.Bold}3-- Check result of the classification:{bco.ResetAll}\n")200 sys.stderr.write(" ■ taxonomy of classified sequences: ")201 sys.stderr.flush()202 o = open(tax_file,"r")203 correct_tax = dict()204 for i in o:205 vals = i.rstrip().split("\t")206 correct_tax[vals[0]] = vals[1]207 o.close()208 o = open(temp_file_res.name,"r")209 o.readline() # remove header210 pred_tax = dict()211 for i in o:212 vals = i.rstrip().split("\t")213 if len(vals) < 2:214 sys.stderr.write(f"{bco.Red}{bco.Bold} Error: less than two values ("+str(vals)+f"){bco.ResetAll}\n")215 os.remove(temp_file_res.name)216 sys.exit(1)217 pred_tax[vals[0]] = vals[1]218 o.close()219 # remove temp file220 os.remove(temp_file_res.name)221 # let's check the values222 if not set(pred_tax.keys()) == set(correct_tax.keys()):223 sys.stderr.write(f"{bco.Red}{bco.Bold} Error: different number of predicted genes{bco.ResetAll}\n")224 print(len(pred_tax), len(correct_tax), file=sys.stderr)225 print(*pred_tax.keys(), sep="\n")226 print("****")227 print(*correct_tax.keys(), sep="\n")228 sys.exit(1)229 # if we arrive here, we have the same set of predicted genes230 # let's check the predicted taxonomies231 error_flag = False232 for i in pred_tax:233 if pred_tax[i] != correct_tax[i]:234 error_flag = True235 sys.stderr.write(f"{bco.Red}{bco.Bold} Error: different taxonomy for "+i+"(correct:'"+correct_tax[i]+"', predicted:'"+pred_tax[i]+"')"+f"{bco.ResetAll}\n")236 if not error_flag:237 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")238 if error_found or error_flag:239 sys.exit(1)240 ############################################################################241 sys.stderr.write(f"{bco.Blue}{bco.Bold} ------------------------------------------------------------------------------{bco.ResetAll}\n")242 sys.stderr.write(f"{bco.Blue}{bco.Bold}|{bco.Green} LONG STAG TEST {bco.Blue}|{bco.ResetAll}\n")243 sys.stderr.write(f"{bco.Blue}{bco.Bold} ------------------------------------------------------------------------------{bco.ResetAll}\n")244 # long test part 1: test building genome DB --------------------------------245 sys.stderr.write(f"{bco.Cyan}{bco.Bold}1-- Build genome database:{bco.ResetAll}\n")246 # prepare data247 link_db = "https://zenodo.org/record/4626959/files/train_genome_files.tar.gz"248 db_name = os.path.join(TEST_DATA_PATH, "train_genome_files.tar.gz")249 md5_db = "5ec5a527d25cc6d1f11a8ec50cd252a7"250 this_dir = download_and_checkmd5_and_decompress(link_db, db_name, md5_db, TEST_DATA_PATH)251 # train genome252 sys.stderr.write(" ■ train genome DB: ")253 sys.stderr.flush()254 gene_files = this_dir + "COG0012," + this_dir + "COG0016," + this_dir + "COG0018"255 merged_db = this_dir + "genes_ali.stagDB"256 thresholds = this_dir + "gene_thresholds"257 result_genome_DB = this_dir + "TEST_DB.stagDB"258 t0 = time.time()259 stag_command = "stag train_genome -v 1 -o "+result_genome_DB+" -i "+gene_files+" -T "+thresholds+" -C "+merged_db260 process = subprocess.run(stag_command.split())261 runtime = time.time() - t0262 if process.returncode:263 sys.stderr.write(f"{bco.Red}{bco.Bold} Error{bco.ResetAll} ({runtime:.3f}s)\n")264 sys.exit(1)265 else:266 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll} ({runtime:.3f}s)\n")267 # long test part 2: test classify genome -----------------------------------268 sys.stderr.write(f"{bco.Cyan}{bco.Bold}2-- Test genome classification:{bco.ResetAll}\n")269 # prepare data270 link_db = "https://zenodo.org/record/4626959/files/classify_genome_files.tar.gz"271 db_name = os.path.join(TEST_DATA_PATH, "classify_genome_files.tar.gz")272 md5_db = "819cc77d463a797a330d8d1d9437feca"273 this_dir = download_and_checkmd5_and_decompress(link_db, db_name, md5_db, TEST_DATA_PATH)274 # train genome275 sys.stderr.write(" ■ classify 2 genomes: ")276 sys.stderr.flush()277 genome_files = this_dir + "genomes"278 result = this_dir + "RESULT_TEMP"279 t0 = time.time()280 stag_command = "stag classify_genome -v 1 -o "+result+" -d "+result_genome_DB+" -D "+genome_files281 process = subprocess.run(stag_command.split())282 runtime = time.time() - t0283 if process.returncode:284 sys.stderr.write(f"{bco.Red}{bco.Bold} Error{bco.ResetAll} ({runtime:.3f}s)\n")285 sys.exit(1)286 else:287 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll} ({runtime:.3f}s)\n")288 # check result of the classification ---------------------------------------289 sys.stderr.write(" ■ check result: ")290 # correct annotation291 o = open(this_dir + "CORRECT_ASSIGNMENT/genome_annotation")292 o.readline()293 correct_classification = dict()294 all_genomes = dict()295 for line in o:296 vals = line.rstrip().split("\t")297 correct_classification[vals[0]] = vals[1]298 all_genomes[vals[0]] = False299 o.close()300 # this annotation301 o = open(this_dir + "RESULT_TEMP/genome_annotation")302 o.readline()303 for line in o:304 vals = line.rstrip().split("\t")305 vals[0] = vals[0].split("/")[-1]306 all_genomes[vals[0]] = True307 if not vals[0] in correct_classification:308 sys.stderr.write(f"{bco.Red}{bco.Bold} Error, too many lines{bco.ResetAll}\n")309 sys.stderr.write(f"{bco.Red}{bco.Bold} Check "+ this_dir + f"RESULT_TEMP/genome_annotation {bco.ResetAll}\n")310 sys.exit(1)311 else:312 if correct_classification[vals[0]] != vals[1]:313 sys.stderr.write(f"{bco.Red}{bco.Bold} Error, wrong calssification{bco.ResetAll}\n")314 sys.stderr.write(f"{bco.Red}{bco.Bold} Check "+ this_dir + f"RESULT_TEMP/genome_annotation {bco.ResetAll}\n")315 sys.exit(1)316 o.close()317 # check that all the genomes were profiled318 for genome in all_genomes:319 if not all_genomes[genome]:320 sys.stderr.write(f"{bco.Red}{bco.Bold} Error, some genomes are missing{bco.ResetAll}\n")321 sys.stderr.write(f"{bco.Red}{bco.Bold} Check "+ this_dir + f"RESULT_TEMP/genome_annotation {bco.ResetAll}\n")322 sys.exit(1)323 # if we arrive till here, then it's correct324 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")325 # long test part 3: test gene train and classification with real data ------326 sys.stderr.write(f"{bco.Cyan}{bco.Bold}3-- Test real genes:{bco.ResetAll}\n")327 # prepare data328 link_db = "https://zenodo.org/record/4626959/files/test_gene.tar.gz"329 db_name = os.path.join(TEST_DATA_PATH, "test_gene.tar.gz")330 md5_db = "bee91d9dc06fae153502af386c29ca5c"331 this_dir = download_and_checkmd5_and_decompress(link_db, db_name, md5_db, TEST_DATA_PATH)332 # train genome333 sys.stderr.write(" ■ train database: ")334 sys.stderr.flush()335 seq_file = this_dir + "train.fna"336 protein_file = this_dir + "train.faa"337 tax_file = this_dir + "train.tax"338 hmm_file = this_dir + "COG0012.hmm"339 trained_db = this_dir + "TRAINED_TEMP"340 temp_file_db = tempfile.NamedTemporaryFile(delete=False, mode="w")341 t0 = time.time()342 stag_command = "stag train -f -o "+trained_db+" -i "+seq_file+" -p "+protein_file+" -x "+tax_file+" -a "+hmm_file + " -t 2"343 process = subprocess.run(stag_command.split())344 runtime = time.time() - t0345 if process.returncode:346 sys.stderr.write(f"{bco.Red}{bco.Bold} Error{bco.ResetAll} ({runtime:.3f}s)\n")347 sys.exit(1)348 else:349 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll} ({runtime:.3f}s)\n")350 sys.stderr.write(" ■ classify: ") #--------------------------------------351 sys.stderr.flush()352 res_classification = this_dir + "RES_TEMP"353 seq_file = this_dir + "test.fna"354 protein_file = this_dir + "test.faa"355 t0 = time.time()356 stag_command = "stag classify -v 1 -d "+trained_db+" -i "+seq_file+" -p "+protein_file+" -o "+res_classification357 process = subprocess.run(stag_command.split())358 runtime = time.time() - t0359 if process.returncode:360 sys.stderr.write(f"{bco.Red}{bco.Bold} Error{bco.ResetAll} ({runtime:.3f}s)\n")361 sys.exit(1)362 else:363 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll} ({runtime:.3f}s)\n")364 # check result of the classification ---------------------------------------365 sys.stderr.write(" ■ check result: ")366 # correct annotation367 o = open(this_dir + "test.CORRECT_ASSIGNMENT")368 o.readline()369 correct_classification = dict()370 all_genomes = dict()371 for line in o:372 vals = line.rstrip().split("\t")373 correct_classification[vals[0]] = vals[1]374 all_genomes[vals[0]] = False375 o.close()376 # this annotation377 o = open(res_classification)378 o.readline()379 for line in o:380 vals = line.rstrip().split("\t")381 all_genomes[vals[0]] = True382 if not vals[0] in correct_classification:383 sys.stderr.write(f"{bco.Red}{bco.Bold} Error, too many lines{bco.ResetAll}\n")384 sys.stderr.write(f"{bco.Red}{bco.Bold} Check "+ res_classification + f"{bco.ResetAll}\n")385 sys.exit(1)386 else:387 if correct_classification[vals[0]] != vals[1]:388 sys.stderr.write(f"\n{bco.Yellow} Corr: "+correct_classification[vals[0]]+f"{bco.ResetAll}\n")389 sys.stderr.write(f"{bco.Yellow} Pred: "+vals[1]+f"{bco.ResetAll}\n")390 else:391 sys.stderr.write(f"\n{bco.LightGreen} Corr: "+correct_classification[vals[0]]+f"{bco.ResetAll}\n")392 sys.stderr.write(f"{bco.LightGreen} Pred: "+vals[1]+f"{bco.ResetAll}\n")393 o.close()394 # check that all the genomes were profiled395 for genome in all_genomes:396 if not all_genomes[genome]:397 sys.stderr.write(f"{bco.Red}{bco.Bold} Error, some genomes are missing{bco.ResetAll}\n")398 sys.stderr.write(f"{bco.Red}{bco.Bold} Check "+ res_classification + f"{bco.ResetAll}\n")399 sys.exit(1)400 # if we arrive till here, then it's correct401 sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")402 # print(*all_genomes.items(), sep="\n")403 return None # success404#-------------------------------- run main -------------------------------------405if __name__ == '__main__':...

Full Screen

Full Screen

Color.py

Source:Color.py Github

copy

Full Screen

1class Color_Manager:2 class BG:3 Black = '\033[40m'4 Red = '\033[41m'5 Green = '\033[42m'6 Yellow = '\033[43m'7 Blue = '\033[44m'8 Magenta = '\033[45m'9 Cyan = '\033[46m'10 White = '\033[47m'11 Reset = '\033[49m'12 class FG:13 Black = '\033[30m'14 Red = '\033[31m'15 Green = '\033[32m'16 Yellow = '\033[33m'17 Blue = '\033[34m'18 Magenta = '\033[35m'19 Cyan = '\033[36m'20 White = '\033[37m'21 Reset = '\033[39m'22 class BRIGHT:23 dim = '\033[2m'24 normal = '\033[22m'25 class EFFECT:26 ResetAll = '\033[0m'27 Italic = '\033[3m'28 Underline = '\033[4m'29 Bold = '\033[1m'30 @classmethod31 def FgPrint(cls, str, color):32 if color == 'Black':33 print(cls.FG.Black + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')34 elif color == 'Red':35 print(cls.FG.Red + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')36 elif color == 'Green':37 print(cls.FG.Green + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')38 elif color == 'Yellow':39 print(cls.FG.Yellow + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')40 elif color == 'Blue':41 print(cls.FG.Blue + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')42 elif color == 'Magenta':43 print(cls.FG.Magenta + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')44 elif color == 'Cyan':45 print(cls.FG.Cyan + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')46 elif color == 'White':47 print(cls.FG.White + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')48 @classmethod49 def BgPrint(cls, str, color):50 if color == 'Black':51 print(cls.BG.Black + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')52 elif color == 'Red':53 print(cls.BG.Red + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')54 elif color == 'Green':55 print(cls.BG.Green + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')56 elif color == 'Yellow':57 print(cls.BG.Yellow + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')58 elif color == 'Blue':59 print(cls.BG.Blue + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')60 elif color == 'Magenta':61 print(cls.BG.Magenta + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')62 elif color == 'Cyan':63 print(cls.BG.Cyan + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')64 elif color == 'White':...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pytest-mock automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful