Unlock 30% off on Manual Testing Annual Plans this Holiday Season.

Copied to Clipboard!

How to use resetall method in pytest-mock

Best Python code snippet using pytest-mock

__main__.py

Source:__main__.py

1#!/usr/bin/env python2import os3import sys4import argparse5import shlex6import shutil7import time8import subprocess9import glob10import tempfile11import errno12import tarfile13import json14import pathlib15from . import __version__ as tool_version16from .helpers import bco, print_error, check_file_exists, check_file_doesnt_exists17import stag.align as align18import stag.create_db as create_db19import stag.classify as classify20import stag.check_create_db_input_files as check_create_db_input_files21import stag.correct_seq as correct_seq22import stag.unzip_db as unzip_db23import stag.classify_genome as classify_genome24import stag.train_genome as train_genome25import stag.convert_ali as convert_ali26def handle_error(error, help_f=None):27    if help_f:28        help_f()29    print_error()30    print(error, file=sys.stderr)31    sys.exit(1)32# ------------------------------------------------------------------------------33#       print the help informations34# ------------------------------------------------------------------------------35class CapitalisedHelpFormatter(argparse.HelpFormatter):36    def add_usage(self, usage, actions, groups, prefix=None):37        if prefix is None:38            prefix = ''39        return super(CapitalisedHelpFormatter, self).add_usage(usage, actions, groups, prefix)40def msg(name=None):41    str_msg = f'''42\0043{bco.Cyan}Program:{bco.ResetAll} stag - Supervised Taxonomic Assignment of marker Genes44{bco.Cyan}Version:{bco.ResetAll} '''+tool_version+f'''45{bco.Cyan}Usage:{bco.ResetAll} stag <command> [options]46{bco.Cyan}Command:{bco.ResetAll}47 {bco.LightGreen}-- Single gene{bco.ResetAll}48      {bco.LightBlue}train{bco.ResetAll}        Train a classifier and create a database49      {bco.LightBlue}classify{bco.ResetAll}     Taxonomically annotate a gene50      {bco.LightBlue}align{bco.ResetAll}        Align a sequence to a hmm or infernal model51      {bco.LightBlue}create_db{bco.ResetAll}    Create a database given the aligned sequences52      {bco.LightBlue}check_input{bco.ResetAll}  Check the input for the train command53      {bco.LightBlue}correct_seq{bco.ResetAll}  Correct sequences that are in wrong orientation54      {bco.LightBlue}convert_ali{bco.ResetAll}  Convert between 1-hot-encoding and fasta, and vice versa55      {bco.LightBlue}unzip_db{bco.ResetAll}     Create a directory with the content of a database56 {bco.LightGreen}-- Genome{bco.ResetAll}57      {bco.LightBlue}train_genome{bco.ResetAll}     Merge classifiers of single genes58      {bco.LightBlue}classify_genome{bco.ResetAll}  Taxonomically annotate a genome (predict genes, extract59                       the database marker genes and classify them)60Type stag <command> to print the help for a specific command61        '''62    return str_msg63# ------------------------------------------------------------------------------64def print_menu_align():65    sys.stderr.write("\n")66    sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} align {bco.LightBlue}-i{bco.ResetAll} <fasta_seqs> {bco.LightBlue}-a{bco.ResetAll} <hmmfile> [options]\n\n")67    sys.stderr.write(f"  {bco.LightBlue}-i{bco.ResetAll}  FILE  sequences to be aligned (fasta format) {bco.LightMagenta}[required]{bco.ResetAll}\n")68    sys.stderr.write(f"  {bco.LightBlue}-p{bco.ResetAll}  FILE  protein sequences, corresponding to -i {bco.LightMagenta}[None]{bco.ResetAll}\n")69    sys.stderr.write(f"  {bco.LightBlue}-a{bco.ResetAll}  FILE  hmmfile or cmfile to use as template for the alignment {bco.LightMagenta}[required]{bco.ResetAll}\n")70    sys.stderr.write(f"  {bco.LightBlue}-o{bco.ResetAll}  FILE  output file name {bco.LightMagenta}[stdout]{bco.ResetAll}\n")71    sys.stderr.write(f"  {bco.LightBlue}-c{bco.ResetAll}        set if you are using a cmfile\n")72    sys.stderr.write(f"  {bco.LightBlue}-m{bco.ResetAll}  INT   threshold for the number of features per sequence (percentage) {bco.LightMagenta}[0]{bco.ResetAll}\n")73    sys.stderr.write(f"  {bco.LightBlue}-t{bco.ResetAll}  INT   number of threads {bco.LightMagenta}[1]{bco.ResetAll}\n")74    sys.stderr.write(f"  {bco.LightBlue}-v{bco.ResetAll}  INT   verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")75    sys.stderr.write(f"{bco.Cyan}Note:{bco.ResetAll} if -p is provided, then the alignment will be done at the level\nof the proteins and then converted to gene alignment (from -i input).\nThe order of the sequences in -i and -p should be the same.\n\n")76# ------------------------------------------------------------------------------77def print_menu_create_db():78    sys.stderr.write("\n")79    sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} create_db {bco.LightBlue}-s{bco.ResetAll} <aligned_file> {bco.LightBlue}-x{bco.ResetAll} <taxonomy_file>\n")80    sys.stderr.write(f"                     {bco.LightBlue}-a{bco.ResetAll} <hmmfile> {bco.LightBlue}-o{bco.ResetAll} <output_DB> [options]\n\n")81    sys.stderr.write(f"  {bco.LightBlue}-s{bco.ResetAll}  FILE  file with 1-hot encoding MSA (result from stag align) {bco.LightMagenta}[required]{bco.ResetAll}\n")82    sys.stderr.write(f"  {bco.LightBlue}-a{bco.ResetAll}  FILE  hmmfile or cmfile to used as template for the alignment {bco.LightMagenta}[required]{bco.ResetAll}\n")83    sys.stderr.write(f"  {bco.LightBlue}-c{bco.ResetAll}        set if you are using a cmfile\n")84    sys.stderr.write(f"  {bco.LightBlue}-x{bco.ResetAll}  FILE  taxonomy file (tab separated) {bco.LightMagenta}[required]{bco.ResetAll}\n")85    sys.stderr.write(f"  {bco.LightBlue}-o{bco.ResetAll}  FILE  output file name (HDF5 format) {bco.LightMagenta}[required]{bco.ResetAll}\n")86    sys.stderr.write(f"  {bco.LightBlue}-f{bco.ResetAll}        force to rewrite output file\n")87    sys.stderr.write(f"  {bco.LightBlue}-C{bco.ResetAll}  FILE  save intermediate cross validation results {bco.LightMagenta}[None]{bco.ResetAll}\n")88    sys.stderr.write(f"  {bco.LightBlue}-p{bco.ResetAll}  FILE  protein sequences, if they were used for the alignment {bco.LightMagenta}[None]{bco.ResetAll}\n")89    sys.stderr.write(f"  {bco.LightBlue}-e{bco.ResetAll}  STR   penalty for the logistic regression {bco.LightMagenta}[\"l1\"]{bco.ResetAll}\n")90    sys.stderr.write(f"  {bco.LightBlue}-E{bco.ResetAll}  STR   solver for the logistic regression {bco.LightMagenta}[\"liblinear\"]{bco.ResetAll}\n")91    sys.stderr.write(f"  {bco.LightBlue}-t{bco.ResetAll}  INT   number of threads {bco.LightMagenta}[1]{bco.ResetAll}\n")92    sys.stderr.write(f"  {bco.LightBlue}-v{bco.ResetAll}  INT   verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")93# ------------------------------------------------------------------------------94def print_menu_classify():95    sys.stderr.write("\n")96    sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} classify {bco.LightBlue}-d{bco.ResetAll} <database> [{bco.LightBlue}-i{bco.ResetAll}/{bco.LightBlue}-s{bco.ResetAll}] <seq_file> [options]\n\n")97    sys.stderr.write(f"  {bco.LightBlue}-d{bco.ResetAll}  FILE  database created with create_db or train {bco.LightMagenta}[required]{bco.ResetAll}\n")98    sys.stderr.write(f"  {bco.LightBlue}-i{bco.ResetAll}  FILE  sequences to taxonomically annotate (fasta format) {bco.LightMagenta}[required]{bco.ResetAll}\n")99    sys.stderr.write(f"  {bco.LightBlue}-s{bco.ResetAll}  FILE  aligned sequences, can be provided instead of -i {bco.ResetAll}\n")100    sys.stderr.write(f"  {bco.LightBlue}-p{bco.ResetAll}  FILE  protein sequences, corresponding to -i {bco.LightMagenta}[None]{bco.ResetAll}\n")101    sys.stderr.write(f"  {bco.LightBlue}-S{bco.ResetAll}  FILE  save intermediate alignment file {bco.LightMagenta}[None]{bco.ResetAll}\n")102    sys.stderr.write(f"  {bco.LightBlue}-o{bco.ResetAll}  FILE  output file name {bco.LightMagenta}[stdout]{bco.ResetAll}\n")103    sys.stderr.write(f"  {bco.LightBlue}-l{bco.ResetAll}        long output (with more information about the classification)\n")104    sys.stderr.write(f"  {bco.LightBlue}-m{bco.ResetAll}  INT   threshold for the number of features per sequence (percentage) {bco.LightMagenta}[0]{bco.ResetAll}\n")105    sys.stderr.write(f"  {bco.LightBlue}-v{bco.ResetAll}  INT   verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")106# ------------------------------------------------------------------------------107def print_menu_check_input():108    sys.stderr.write("\n")109    sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} check_input {bco.LightBlue}-i{bco.ResetAll} <fasta_seqs> {bco.LightBlue}-x{bco.ResetAll} <taxonomy_file>\n")110    sys.stderr.write(f"                       {bco.LightBlue}-a{bco.ResetAll} <hmmfile> [options]\n\n")111    sys.stderr.write(f"  {bco.LightBlue}-i{bco.ResetAll}  FILE  sequences to be aligned (fasta format) {bco.LightMagenta}[required]{bco.ResetAll}\n")112    sys.stderr.write(f"  {bco.LightBlue}-p{bco.ResetAll}  FILE  protein sequences, corresponding to -i {bco.LightMagenta}[None]{bco.ResetAll}\n")113    sys.stderr.write(f"  {bco.LightBlue}-a{bco.ResetAll}  FILE  hmmfile or cmfile to used as template for the alignment {bco.LightMagenta}[required]{bco.ResetAll}\n")114    sys.stderr.write(f"  {bco.LightBlue}-c{bco.ResetAll}        set if you are using a cmfile\n")115    sys.stderr.write(f"  {bco.LightBlue}-x{bco.ResetAll}  FILE  taxonomy file (tab separated) {bco.LightMagenta}[required]{bco.ResetAll}\n")116    sys.stderr.write(f"  {bco.LightBlue}-w{bco.ResetAll}  FILE  save warning messages to a file {bco.LightMagenta}[None]{bco.ResetAll}\n")117    sys.stderr.write(f"  {bco.LightBlue}-v{bco.ResetAll}  INT   verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")118# ------------------------------------------------------------------------------119def print_menu_train():120    sys.stderr.write("\n")121    sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} train {bco.LightBlue}-i{bco.ResetAll} <fasta_seqs> {bco.LightBlue}-x{bco.ResetAll} <taxonomy_file>\n")122    sys.stderr.write(f"                 {bco.LightBlue}-a{bco.ResetAll} <hmmfile> {bco.LightBlue}-o{bco.ResetAll} <output_DB> [options]\n\n")123    sys.stderr.write(f"  {bco.LightBlue}-i{bco.ResetAll}  FILE  sequences to be aligned (fasta format) {bco.LightMagenta}[required]{bco.ResetAll}\n")124    sys.stderr.write(f"  {bco.LightBlue}-p{bco.ResetAll}  FILE  protein sequences, corresponding to -i {bco.LightMagenta}[None]{bco.ResetAll}\n")125    sys.stderr.write(f"  {bco.LightBlue}-a{bco.ResetAll}  FILE  hmmfile or cmfile to used as template for the alignment {bco.LightMagenta}[required]{bco.ResetAll}\n")126    sys.stderr.write(f"  {bco.LightBlue}-c{bco.ResetAll}        set if you are using a cmfile\n")127    sys.stderr.write(f"  {bco.LightBlue}-x{bco.ResetAll}  FILE  taxonomy file (tab separated) {bco.LightMagenta}[required]{bco.ResetAll}\n")128    sys.stderr.write(f"  {bco.LightBlue}-o{bco.ResetAll}  FILE  output file name (HDF5 format) {bco.LightMagenta}[required]{bco.ResetAll}\n")129    sys.stderr.write(f"  {bco.LightBlue}-f{bco.ResetAll}        force to rewrite output file\n\n")130    sys.stderr.write(f"  {bco.LightBlue}-S{bco.ResetAll}  FILE  save intermediate alignment file {bco.LightMagenta}[None]{bco.ResetAll}\n")131    sys.stderr.write(f"  {bco.LightBlue}-C{bco.ResetAll}  FILE  save intermediate cross validation results {bco.LightMagenta}[None]{bco.ResetAll}\n")132    sys.stderr.write(f"  {bco.LightBlue}-t{bco.ResetAll}  INT   number of threads {bco.LightMagenta}[1]{bco.ResetAll}\n")133    sys.stderr.write(f"  {bco.LightBlue}-m{bco.ResetAll}  INT   threshold for the number of features per sequence (percentage) {bco.LightMagenta}[0]{bco.ResetAll}\n")134    sys.stderr.write(f"  {bco.LightBlue}-v{bco.ResetAll}  INT   verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")135    sys.stderr.write(f"  {bco.LightBlue}-e{bco.ResetAll}  STR   penalty for the logistic regression {bco.LightMagenta}[\"l1\"]{bco.ResetAll}\n")136    sys.stderr.write(f"  {bco.LightBlue}-E{bco.ResetAll}  STR   solver for the logistic regression {bco.LightMagenta}[\"liblinear\"]{bco.ResetAll}\n\n")137    sys.stderr.write(f"{bco.Cyan}Note:{bco.ResetAll} if -p is provided, then the alignment will be done at the level\nof the proteins and then converted to gene alignment (from -i input).\nThe order of the sequences in -i and -p should be the same.\n\n")138# ------------------------------------------------------------------------------139def print_menu_correct_seq():140    sys.stderr.write("\n")141    sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} correct_seq {bco.LightBlue}-i{bco.ResetAll} <fasta_seqs> {bco.LightBlue}-a{bco.ResetAll} <hmmfile> [options]\n\n")142    sys.stderr.write(f"  {bco.LightBlue}-i{bco.ResetAll}  FILE  sequences to be aligned (fasta format) {bco.LightMagenta}[required]{bco.ResetAll}\n")143    sys.stderr.write(f"  {bco.LightBlue}-a{bco.ResetAll}  FILE  hmmfile or cmfile to use as template for the alignment {bco.LightMagenta}[required]{bco.ResetAll}\n")144    sys.stderr.write(f"  {bco.LightBlue}-o{bco.ResetAll}  FILE  output file name {bco.LightMagenta}[stdout]{bco.ResetAll}\n")145    sys.stderr.write(f"  {bco.LightBlue}-c{bco.ResetAll}        set if you are using a cmfile\n")146    sys.stderr.write(f"  {bco.LightBlue}-m{bco.ResetAll}  INT   threshold for the number of features per sequence (percentage) {bco.LightMagenta}[5]{bco.ResetAll}\n")147    sys.stderr.write(f"  {bco.LightBlue}-t{bco.ResetAll}  INT   number of threads {bco.LightMagenta}[1]{bco.ResetAll}\n")148    sys.stderr.write(f"  {bco.LightBlue}-v{bco.ResetAll}  INT   verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")149# ------------------------------------------------------------------------------150def print_menu_train_genome():151    sys.stderr.write("\n")152    sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} train_genome {bco.LightBlue}-i{bco.ResetAll} <list_gene_DBs> {bco.LightBlue}-T{bco.ResetAll} <gene_thresholds>\n                         {bco.LightBlue}-o{bco.ResetAll} <output_DB> {bco.LightBlue}-C{bco.ResetAll} <concat_genes_DB> [options]\n\n")153    sys.stderr.write(f"  {bco.LightBlue}-i{bco.ResetAll}  LIST  list of single gene databases to use (comma separated) {bco.LightMagenta}[required]{bco.ResetAll}\n")154    sys.stderr.write(f"  {bco.LightBlue}-T{bco.ResetAll}  FILE  hmm treshold for selecting the genes {bco.LightMagenta}[required]{bco.ResetAll}\n")155    sys.stderr.write(f"  {bco.LightBlue}-C{bco.ResetAll}  FILE  stag database for the concatenated genes{bco.LightMagenta}[required]{bco.ResetAll}\n")156    sys.stderr.write(f"  {bco.LightBlue}-o{bco.ResetAll}  FILE  output file name (HDF5 format) {bco.LightMagenta}[required]{bco.ResetAll}\n")157    sys.stderr.write(f"  {bco.LightBlue}-t{bco.ResetAll}  INT   number of threads {bco.LightMagenta}[1]{bco.ResetAll}\n")158    sys.stderr.write(f"  {bco.LightBlue}-v{bco.ResetAll}  INT   verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")159# ------------------------------------------------------------------------------160def print_menu_classify_genome():161    sys.stderr.write("\n")162    sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} classify_genome {bco.LightBlue}-d{bco.ResetAll} <genome_database> {bco.LightBlue}-o{bco.ResetAll} res_dir\n")163    sys.stderr.write(f"                            [{bco.LightBlue}-i{bco.ResetAll} <fasta_seq>/{bco.LightBlue}-D{bco.ResetAll} <directory>/{bco.LightBlue}-G{bco.ResetAll} <markers.json>] [options]\n\n")164    sys.stderr.write(f"  {bco.LightBlue}-d{bco.ResetAll}  FILE   database created with train_genome {bco.LightMagenta}[required]{bco.ResetAll}\n")165    sys.stderr.write(f"  {bco.LightBlue}-i{bco.ResetAll}  FILE   genome fasta file\n")166    sys.stderr.write(f"  {bco.LightBlue}-D{bco.ResetAll}  DIR    directory containing genome fasta files (only fasta\n             files will be used)\n")167    sys.stderr.write(f"  {bco.LightBlue}-G{bco.ResetAll}  FILE   json file pointing at a marker gene set (in lieu of a full genome)\n")168    sys.stderr.write(f"  {bco.LightBlue}-o{bco.ResetAll}  DIR    output directory {bco.LightMagenta}[required]{bco.ResetAll}\n")169    sys.stderr.write(f"  {bco.LightBlue}-l{bco.ResetAll}         long output (with more information about the classification) {bco.LightMagenta}[False]\n")170    sys.stderr.write(f"  {bco.LightBlue}-v{bco.ResetAll}  INT    verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n")171    sys.stderr.write(f"  {bco.LightBlue}-r{bco.ResetAll}         use all genes above the filter {bco.LightMagenta}[False]{bco.ResetAll}\n\n")172    sys.stderr.write(f"  {bco.LightBlue}-t{bco.ResetAll}  INT   number of threads {bco.LightMagenta}[1]{bco.ResetAll}\n")173# ------------------------------------------------------------------------------174def print_menu_convert_ali():175    sys.stderr.write("\n")176    sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} convert_ali {bco.LightBlue}-i{bco.ResetAll} <file_in> {bco.LightBlue}-o{bco.ResetAll} <file_out> [options]\n\n")177    sys.stderr.write(f"  {bco.LightBlue}-i{bco.ResetAll}  FILE   Input file, either a 1-hot-encoding created by stag align,\n")178    sys.stderr.write(f"             or a fasta file of aligned sequences created by hmmalign.\n")179    sys.stderr.write(f"             The input type is detected automatically {bco.LightMagenta}[required]{bco.ResetAll}\n")180    sys.stderr.write(f"  {bco.LightBlue}-o{bco.ResetAll}  FILE   A 1-hot-encoding if the input was fasta,\n")181    sys.stderr.write(f"             or a fasta file if the input was 1-hot-encoding {bco.LightMagenta}[required]{bco.ResetAll}\n")182    sys.stderr.write(f"  {bco.LightBlue}-v{bco.ResetAll}  INT    verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")183# ------------------------------------------------------------------------------184def print_menu_unzip_db():185    sys.stderr.write("\n")186    sys.stderr.write(f"{bco.Cyan}Usage:{bco.ResetAll} {bco.Green}stag{bco.ResetAll} unzip_db {bco.LightBlue}-d{bco.ResetAll} <database> {bco.LightBlue}-o{bco.ResetAll} <dir_out> [options]\n\n")187    sys.stderr.write(f"  {bco.LightBlue}-d{bco.ResetAll}  FILE  database created with create_db or train {bco.LightMagenta}[required]{bco.ResetAll}\n")188    sys.stderr.write(f"  {bco.LightBlue}-o{bco.ResetAll}  DIR   create a dir with the unzipped database {bco.LightMagenta}[required]{bco.ResetAll}\n")189    sys.stderr.write(f"  {bco.LightBlue}-v{bco.ResetAll}  INT   verbose level: 1=error, 2=warning, 3=message, 4+=debugging {bco.LightMagenta}[3]{bco.ResetAll}\n\n")190# ------------------------------------------------------------------------------191# MAIN192# ------------------------------------------------------------------------------193def main(argv=None):194    parser = argparse.ArgumentParser(usage=msg(), formatter_class=CapitalisedHelpFormatter,add_help=False)195    parser.add_argument('command', action="store", default=None, help='mode to run stag',196                        choices=['align','train','classify','create_db','check_input','correct_seq','train_genome',197                                 'classify_genome','test','convert_ali',"unzip_db"])198    parser.add_argument('-o', action="store", dest='output', default=None, help='name of output file')199    parser.add_argument('-t', type=int, action="store", dest='threads', default=1, help='Number of threads to be used.')200    parser.add_argument('-v', action='store', type=int, default=3, dest='verbose', help='Verbose levels', choices=list(range(1,5)))201    parser.add_argument('-c', action='store_true', dest='use_cm_align', help='Set if you want to use cmalign isntead of hmmalign')202    parser.add_argument('-s', action="store", default=None,dest='aligned_sequences', help='sequences that needs to be aligned')203    parser.add_argument('-a', action="store", default=None,dest='template_al', help='alignment template')204    parser.add_argument('-x', action="store", default=None,dest='taxonomy', help='taxonomy file path')205    parser.add_argument('-f', action='store_true', dest='force_rewrite', help='Set if you want to rewrite the file, even if it exists')206    parser.add_argument('-i', action="store", dest='fasta_input', default=None, help='input fasta sequences')207    parser.add_argument('-p', action="store", dest='protein_fasta_input', default=None, help='input fasta sequences, in protein format. Corresponding to the -i sequences')208    parser.add_argument('-w', action="store", dest='warning_file_check_input', default=None, help='for check_input there can be quite some warning messages. Use -w to save them to a file')209    parser.add_argument('-d', action="store", dest='database', default=None, help='file containing the database')210    parser.add_argument('-S', action="store", dest='intermediate_al', default=None, help='name of the file for the intermediate alignment')211    parser.add_argument('-C', action="store", dest='intermediate_cross_val', default=None, help='name of the file for the intermediate cross validation results')212    parser.add_argument('-m', action='store', type=int, default=None, dest='min_perc_state', help='Minimum number of mapping states, i.e. how many features of the classifier we cover')213    parser.add_argument('-l', action='store_true', dest='long_out', help='Print more columns for the classification pipeline')214    parser.add_argument('-r', action='store_true', dest='keep_all_genes', help='keep all genes when doing the classification of genomes')215    parser.add_argument('-D', action="store", dest='dir_input', default=None, help='input directory')216    parser.add_argument('-T', action="store", dest='file_thresholds', default=None, help='file with the thresholds for the genes in the genome classifier') # basically the minimum score required217    parser.add_argument('-e', action="store", default="l1", dest='penalty_logistic', help='penalty for the logistic regression',choices=['l1','l2','none'])218    parser.add_argument('-E', action="store", default="liblinear", dest='solver_logistic', help='solver for the logistic regression',choices=['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'])219    parser.add_argument('-G', action="store", dest="marker_genes", default=None, help="Set of identified marker genes in lieu of a genomic sequence")220    parser.add_argument('--version', action='version', version='%(prog)s {0} on python {1}'.format(tool_version, sys.version.split()[0]))221    args = parser.parse_args()222    # --------------------------------------------------------------------------223    # TEST routine224    # --------------------------------------------------------------------------225    if args.command == 'test':226        popenCMD = "stag_test"227        child = subprocess.Popen(popenCMD)228        child.communicate()229        rc = child.wait()230        return(rc)231    # --------------------------------------------------------------------------232    # set defaults for the parameters233    # --------------------------------------------------------------------------234    if args.command == 'correct_seq':235        if (args.min_perc_state is None): args.min_perc_state = 5236    else:237        if (args.min_perc_state is None): args.min_perc_state = 0238    if args.threads < 1:239        handle_error("number of threads (-t) is less than 1", None)240    if args.min_perc_state < 0 or args.min_perc_state > 100:241        handle_error("-m should be between 0 and 100. It represents the percentage\n"242                     "of internal states covered by the sequence (i.e. the number of features).", None)243    # --------------------------------------------------------------------------244    # ALIGN routine245    # --------------------------------------------------------------------------246    error = ""247    if args.command == 'align':248        # check that '-i' and '-a' have been provided249        if not args.fasta_input:250            error = "missing <seqfile> (-i)"251        elif not args.template_al:252            error = "missing <hmmfile>/<cmfile> (-a)"253        if error:254            handle_error(error, print_menu_align)255        # check that '-s' and '-a' are files256        check_file_exists(args.fasta_input, isfasta = True)257        check_file_exists(args.template_al, isfasta = False)258        # if -p is provided, then check that it is a fasta file259        if args.protein_fasta_input:260            check_file_exists(args.protein_fasta_input, isfasta = True)261        # call the function262        if args.output is None:263            for ali in align.align_generator(args.fasta_input, args.protein_fasta_input, args.template_al,264                                             args.use_cm_align, args.threads, args.verbose, False, args.min_perc_state):265                print(ali)266        else:267            align.align_file(args.fasta_input, args.protein_fasta_input, args.template_al, args.use_cm_align, args.threads,268                             args.verbose, args.output, args.min_perc_state)269    # --------------------------------------------------------------------------270    # CREATE_DB routine271    # --------------------------------------------------------------------------272    elif args.command == 'create_db':273        if not args.aligned_sequences:274            # check that '-s' has been provided (alignment produced by stag align)275            error = "missing <aligned_file> (-s)"276        elif not args.taxonomy:277            # check that '-x' has been provided (taxonomy file)278            error = "missing <taxonomy_file> (-x)"279        elif not args.template_al:280            # check that the hmm file is provided281            error = "missing <hmmfile>/<cmfile> (-a)"282        elif not args.output:283            # check that output is set284            error = "missing <output_DB> (-o)"285        if error:286            handle_error(error, print_menu_create_db)287        # check that '-s' and '-a' are files288        check_file_exists(args.aligned_sequences, isfasta = False)289        check_file_exists(args.taxonomy, isfasta = False)290        check_file_exists(args.template_al, isfasta = False)291        if not args.force_rewrite:292            check_file_doesnt_exists(args.output)293        # call the function to create the database294        create_db.create_db(args.aligned_sequences, args.taxonomy, args.verbose, args.output, args.use_cm_align,295                            args.template_al, args.intermediate_cross_val, args.protein_fasta_input,296                            args.penalty_logistic, args.solver_logistic, procs=args.threads)297    # --------------------------------------------------------------------------298    # TRAIN routine299    # --------------------------------------------------------------------------300    elif args.command == 'train':301        # check that '-i' and '-a' have been provided302        if not args.fasta_input:303            error = "missing <seqfile> (-i)"304        elif not args.template_al:305            error = "missing <hmmfile>/<cmfile> (-a)"306        elif not args.taxonomy:307            # check that '-x' has been provided (taxonomy file)308            error = "missing <taxonomy_file> (-x)"309        elif not args.output:310            # check that output is set311            error = "missing <output_DB> (-o)"312        if error:313            handle_error(error, print_menu_train)314        # check that '-s' and '-a' are files315        check_file_exists(args.fasta_input, isfasta = True)316        check_file_exists(args.template_al, isfasta = False)317        # if -p is provided, then check that it is a fasta file318        if args.protein_fasta_input:319            check_file_exists(args.protein_fasta_input, isfasta = True)320        check_file_exists(args.taxonomy, isfasta = False)321        if not args.force_rewrite:322            check_file_doesnt_exists(args.output)323        # FIRST: ALIGN ---------------------------------------------------------324        # we create a temporary file that will contain the alignments325        al_file = tempfile.NamedTemporaryFile(delete=False, mode="w")326        os.chmod(al_file.name, 0o644)327        # call the function328        align.align_file(args.fasta_input, args.protein_fasta_input, args.template_al, args.use_cm_align,329                         args.threads, args.verbose, al_file.name, args.min_perc_state)330        # SECOND: CREATE_DB ----------------------------------------------------331        # call the function to create the database332        create_db.create_db(al_file.name, args.taxonomy, args.verbose, args.output, args.use_cm_align,333                            args.template_al, args.intermediate_cross_val, args.protein_fasta_input,334                            args.penalty_logistic, args.solver_logistic, procs=args.threads)335        # what to do with intermediate alignment -------------------------------336        if not args.intermediate_al:337            # remove it338            os.remove(al_file.name)339        else:340            # save it341            shutil.move(al_file.name, args.intermediate_al)342    # --------------------------------------------------------------------------343    # CLASSIFY routine344    # --------------------------------------------------------------------------345    elif args.command == 'classify':346        # check that '-i' has been provided (alignment produced by stag align)347        if not args.fasta_input and not args.aligned_sequences:348            error = "missing <fasta_seqs> (-i) or <aligned_seq> (-s)"349        elif not args.database:350            # check that '-d' has been provided (taxonomy file)351            error = "missing <database> (-d)"352        if error:353            handle_error(error, print_menu_classify)354        # check that they are files355        if args.fasta_input:356            check_file_exists(args.fasta_input, isfasta = True)357        check_file_exists(args.database, isfasta = False)358        # if -p is provided, then check that it is a fasta file359        if args.protein_fasta_input:360            check_file_exists(args.protein_fasta_input, isfasta = True)361        # if -S is provided, we remove the file if it exists, since in the362        # function it appends only363        if args.intermediate_al:364            if os.path.isfile(args.intermediate_al):365                os.remove(args.intermediate_al)366        # call the function367        classify.classify(args.database, fasta_input=args.fasta_input, protein_fasta_input=args.protein_fasta_input, 368                          verbose=args.verbose, threads=args.threads, output=args.output, long_out=args.long_out, 369                          current_tool_version=tool_version, aligned_sequences=args.aligned_sequences,370                          save_ali_to_file=args.intermediate_al, min_perc_state=args.min_perc_state)371    # --------------------------------------------------------------------------372    # CHECK_INPUT routine373    # --------------------------------------------------------------------------374    elif args.command == 'check_input':375        if not args.fasta_input:376            error = "missing <fasta_seqs> (-i)"377        elif not args.taxonomy:378            error = "missing <taxonomy_file> (-x)"379        elif not args.template_al:380            error = "missing <hmmfile>/<cmfile> (-a)"381        if error:382            handle_error(error, print_menu_check_input)383        check_create_db_input_files.check_input_files(args.fasta_input, args.protein_fasta_input, args.taxonomy,384                                                      args.template_al, args.use_cm_align, args.warning_file_check_input)385    # --------------------------------------------------------------------------386    # CORRECT_SEQ routine387    # --------------------------------------------------------------------------388    # check if the sequences are in correct orientation, if they are not, then389    # take reverse complement. Save to -o all the seqeunces is correct order390    elif args.command == 'correct_seq':391        # check that '-i' and '-a' have been provided392        if not args.fasta_input:393            error = "missing <seqfile> (-i)"394        elif not args.template_al:395            error = "missing <hmmfile>/<cmfile> (-a)"396        if error:397            handle_error(error, print_menu_correct_seq)398        # check that '-s' and '-a' are files399        check_file_exists(args.fasta_input, isfasta = True)400        check_file_exists(args.template_al, isfasta = False)401        # call the function402        correct_seq.correct_seq(args.fasta_input, args.template_al, args.use_cm_align, args.threads, args.verbose,403                                args.min_perc_state, args.output)404    # --------------------------------------------------------------------------405    # CONVERT_ALI routine406    # --------------------------------------------------------------------------407    elif args.command == 'convert_ali':408        # check that '-i' and '-o' have been provided409        if not args.fasta_input:410            error = "missing <file_in> (-i)"411        elif not args.output:412            error = "missing <file_out> (-o)"413        if error:414            handle_error(error, print_menu_convert_ali)415        # check that '-i' is a file416        check_file_exists(args.fasta_input, isfasta = False)417        # call function418        convert_ali.convert_ali(args.fasta_input, args.output, args.verbose)419    # --------------------------------------------------------------------------420    # UNZIP_db routine421    # --------------------------------------------------------------------------422    elif args.command == 'unzip_db':423        # check that '-d' and '-o' have been provided424        if not args.database:425            error = "missing <database> (-d)"426        elif not args.output:427            error = "missing <dir_out> (-o)"428        if error:429            handle_error(error, print_menu_unzip_db)430        # check that '-d' is a file431        check_file_exists(args.database, isfasta = False)432        # call function433        unzip_db.unzip_db(args.database, args.verbose, args.output)434    # --------------------------------------------------------------------------435    # TRAIN_GENOME routine436    # --------------------------------------------------------------------------437    # We want to have a database for classifying genomes. It will contains the438    # calssifiers for the seingle genes439    # Input: single gene databases440    # Output: a database file (hdf5) that can be used by stag classify_genome441    elif args.command == 'train_genome':442        # check that parameters are set ----------------------------------------443        if not args.output:444            error = "missing <output_DB> (-o)"445        elif not args.fasta_input:446            error = "missing <list_gene_DBs> (-i)"447        elif not args.file_thresholds:448            error = "missing <gene_thresholds> (-T)"449        elif not args.intermediate_cross_val:450            error = "missing <concat_genes_DB> (-C)"451        if error:452            handle_error(error, print_menu_train_genome)453        # call the function454        train_genome.train_genome(args.output, args.fasta_input, args.file_thresholds,455                                  args.threads, args.verbose, args.intermediate_cross_val)456    # --------------------------------------------------------------------------457    # CLASSIFY_GENOME routine458    # --------------------------------------------------------------------------459    if args.command == 'classify_genome':460        # check input461        if not args.database:462            error = "missing <database> (-d)"463        elif not any((args.fasta_input, args.dir_input, args.marker_genes)):464            error = "you need to provide at least -i, -D, or -G."465        elif sum(map(bool, (args.fasta_input, args.dir_input, args.marker_genes))) != 1:466            error = "options -i, -D, and -G are mutually exclusive"467        elif args.dir_input and not os.path.isdir(args.dir_input):468            error = "-D is not a directory."469        elif args.marker_genes and not os.path.isfile(args.marker_genes):470            error = "-G is not a valid file."471        elif not args.output:472            # check that output dir is defined473            error = "missing output directory (-o)"474        if error:475            handle_error(error, print_menu_classify_genome)476        # find files to classify477        marker_genes, list_files = list(), list()478        if args.fasta_input:479            check_file_exists(args.fasta_input, isfasta = True)480            list_files.append(args.fasta_input)481        elif args.marker_genes:482            marker_genes = [args.marker_genes]483        else:484            for f in os.listdir(args.dir_input):485                f = os.path.join(args.dir_input, f)486                try:487                    if os.path.isfile(f) and open(f).read(1)[0] == ">":488                        list_files.append(f)489                except Exception as e:490                    if args.verbose > 1:491                        sys.stderr.write("[W::main] Warning: ")492                        sys.stderr.write("Cannot open file: {}\n".format(f))493            if not list_files:494                handle_error("no fasta files found in the directory.", None)495            sys.stderr.write(" Found "+str(len(list_files))+" fasta files\n")496        if os.path.isdir(args.output):497            if args.force_rewrite:498                shutil.rmtree(args.output)499            else:500                handle_error("output directory (-o {}) exists already.".format(args.output), None)501        # create output dir502        try:503            pathlib.Path(args.output).mkdir(exist_ok=True, parents=True)504        except:505            handle_error("creating the output directory (-o {}).".format(args.output), None)506        if list_files:507            from stag.classify_genome import validate_genome_files508            validate_genome_files(list_files)509        # call the function510        classify_genome.classify_genome(args.database, genome_files=list_files, marker_genes=marker_genes,511                                        verbose=args.verbose, threads=args.threads,512                                        output=args.output, long_out=args.long_out, keep_all_genes=args.keep_all_genes)513    return None        # success514#-------------------------------- run main -------------------------------------515if __name__ == '__main__':...

stag_test.py

Source:stag_test.py

1#!/usr/bin/env python2# ============================================================================ #3# stag_test Run stag tests4#5# Author: Alessio Milanese (milanese@embl.de)6#7# ============================================================================ #8import time9import os10import sys11import tempfile12import subprocess13import shlex14import errno15import pkg_resources16import urllib.request17import hashlib18from pathlib import Path19import shutil20from .helpers import bco, is_tool, is_tool_and_return021TEST_DATA_PATH = pkg_resources.resource_filename("stag", "test")22# check md523def md5(fname):24    hash_md5 = hashlib.md5()25    with open(fname, "rb") as f:26        for chunk in iter(lambda: f.read(4096), b""):27            hash_md5.update(chunk)28    return hash_md5.hexdigest()29# download a file if it's not there30def download_file(url, filename):31    downloaded_correct = True32    try:33        urllib.request.urlretrieve(url, filename)34    except:35        downloaded_correct = False36    if downloaded_correct:37        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")38    else:39        sys.stderr.write(f"{bco.Yellow}{bco.Bold} error{bco.ResetAll}\n")40# function to download and decompress a tar.gz41def download_and_checkmd5_and_decompress(url, filename, md5_db, destination):42    # we remove a dir if it exist already43    shutil.rmtree(filename[0:-7], ignore_errors=True)44    # check if the file is already downloaded45    my_file = Path(filename)46    if my_file.is_file():47        sys.stderr.write("  â  already downloaded\n")48    else:49        sys.stderr.write("  â  download file:     ")50        download_file(url, filename)51    # check md552    sys.stderr.write("  â  check md5:            ")53    current_md5 = md5(filename)54    if current_md5 == md5_db:55        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")56    else:57        sys.stderr.write(f"{bco.Yellow}{bco.Bold} different{bco.ResetAll}\n")58        sys.stderr.write("  â  Re-download file:     ")59        download_file(url, filename)60        sys.stderr.write("  â  check md5:            ")61        current_md5 = md5(filename)62        if current_md5 == md5_db:63            sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")64        else:65            sys.stderr.write(f"{bco.Yellow}{bco.Bold} different{bco.ResetAll}\n")66    # decompress67    sys.stderr.write("  â  Unzip file:           ")68    extract_cmd = "tar -zxvf "+filename+" -C "+destination69    try:70        FNULL = open(os.devnull, 'w')71        process = subprocess.Popen(extract_cmd.split(),stderr=FNULL,stdout=FNULL)72        output, error = process.communicate()73    except:74        sys.stderr.write(f"{bco.Yellow}{bco.Bold} error{bco.ResetAll}\n")75    if process.returncode:76        sys.stderr.write(f"{bco.Yellow}{bco.Bold} error{bco.ResetAll}\n")77    else:78        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")79    # we return the reulting directory, which should have the same name as80    # the file downloaded minus ".tar.gz"81    return filename[0:-7]+"/"82# ------------------------------------------------------------------------------83# MAIN84# ------------------------------------------------------------------------------85def main(argv=None):86    sys.stderr.write(f"{bco.Blue}{bco.Bold} ------------------------------------------------------------------------------{bco.ResetAll}\n")87    sys.stderr.write(f"{bco.Blue}{bco.Bold}|{bco.Green}                                    TEST STAG                                 {bco.Blue}|{bco.ResetAll}\n")88    sys.stderr.write(f"{bco.Blue}{bco.Bold} ------------------------------------------------------------------------------{bco.ResetAll}\n")89    error_found = False90    # CHECK TOOLS ==============================================================91    sys.stderr.write(f"{bco.Cyan}{bco.Bold}1-- Tools and versions:{bco.ResetAll}\n")92    # check python version -----------------------------------------------------93    sys.stderr.write("  â  python:       ")94    python_version = sys.version_info95    if(python_version >= (3,0,0)):96        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")97    else:98        sys.stderr.write(f"{bco.Yellow}{bco.Bold} WARNING: python2 is not supported{bco.ResetAll}\n\n")99        error_found = True100    # check hmmer --------------------------------------------------------------101    sys.stderr.write("  â  hmmalign:     ")102    if is_tool("hmmalign"):103        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")104    else:105        sys.stderr.write(f"{bco.Yellow}{bco.Bold} WARNING. HMMER3 is not in the path{bco.ResetAll}\n\n")106        error_found = True107    # check Easel --------------------------------------------------------------108    sys.stderr.write("  â  esl-reformat: ")109    if is_tool("esl-reformat"):110        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")111    else:112        sys.stderr.write(f"{bco.Yellow}{bco.Bold} WARNING. EASEL is not in the path{bco.ResetAll}\n\n")113        error_found = True114    # check Easel --------------------------------------------------------------115    sys.stderr.write("  â  seqtk:        ")116    if is_tool("seqtk"):117        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")118    else:119        sys.stderr.write(f"{bco.Yellow}{bco.Bold} WARNING. seqtk is not in the path{bco.ResetAll}\n\n")120        error_found = True121    # Python libraries:122    sys.stderr.write("  â  (L)numpy:     ") #------------------------------------123    library_correct = True124    try:125        import numpy126    except ImportError as e:127        library_correct = False128    if library_correct:129        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")130    else:131        sys.stderr.write(f"{bco.Yellow}{bco.Bold} WARNING. numpy is missing{bco.ResetAll}\n\n")132        error_found = True133    sys.stderr.write("  â  (L)pandas:    ") #------------------------------------134    library_correct = True135    try:136        import pandas137    except ImportError as e:138        library_correct = False139    if library_correct:140        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")141    else:142        sys.stderr.write(f"{bco.Yellow}{bco.Bold} WARNING. pandas is missing{bco.ResetAll}\n\n")143        error_found = True144    sys.stderr.write("  â  (L)sklearn:   ") #------------------------------------145    library_correct = True146    try:147        import sklearn148    except ImportError as e:149        library_correct = False150    if library_correct:151        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")152    else:153        sys.stderr.write(f"{bco.Yellow}{bco.Bold} WARNING. sklearn is missing{bco.ResetAll}\n\n")154        error_found = True155    sys.stderr.write("  â  (L)h5py:      ") #------------------------------------156    library_correct = True157    try:158        import h5py159    except ImportError as e:160        library_correct = False161    if library_correct:162        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")163    else:164        sys.stderr.write(f"{bco.Yellow}{bco.Bold} WARNING. h5py is missing{bco.ResetAll}\n\n")165        error_found = True166    # TRY TO RUN STAG ==========================================================167    sys.stderr.write(f"{bco.Cyan}{bco.Bold}2-- Run stag:{bco.ResetAll}\n")168    sys.stderr.write("  â  train:      ") #--------------------------------------169    sys.stderr.flush()170    seq_file = os.path.join(TEST_DATA_PATH, "sequences.fasta")171    tax_file = os.path.join(TEST_DATA_PATH, "sequences.taxonomy")172    hmm_file = os.path.join(TEST_DATA_PATH, "gene.hmm")173    temp_file_db = tempfile.NamedTemporaryFile(delete=False, mode="w")174    t0 = time.time()175    stag_command = "stag train -f -o "+temp_file_db.name+" -i "+seq_file+" -x "+tax_file+" -a "+hmm_file176    process = subprocess.run(stag_command.split())177    runtime = time.time() - t0178    if process.returncode:179        sys.stderr.write(f"{bco.Red}{bco.Bold} Error{bco.ResetAll} ({runtime:.3f}s)\n")180        sys.exit(1)181    else:182        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll} ({runtime:.3f}s)\n")183    sys.stderr.write("  â  classify:   ") #--------------------------------------184    sys.stderr.flush()185    temp_file_res = tempfile.NamedTemporaryFile(delete=False, mode="w")186    t0 = time.time()187    stag_command = "stag classify -v 1 -d "+temp_file_db.name+" -i "+seq_file+" -o "+temp_file_res.name188    process = subprocess.run(stag_command.split())189    runtime = time.time() - t0190    if process.returncode:191        sys.stderr.write(f"{bco.Red}{bco.Bold} Error{bco.ResetAll} ({runtime:.3f}s)\n")192        sys.exit(1)193    else:194        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll} ({runtime:.3f}s)\n")195    # remove temp file196    os.remove(temp_file_db.name+".log")197    os.remove(temp_file_db.name)198    # CHECK THE RESULTING FILE =================================================199    sys.stderr.write(f"{bco.Cyan}{bco.Bold}3-- Check result of the classification:{bco.ResetAll}\n")200    sys.stderr.write("  â  taxonomy of classified sequences: ")201    sys.stderr.flush()202    o = open(tax_file,"r")203    correct_tax = dict()204    for i in o:205        vals = i.rstrip().split("\t")206        correct_tax[vals[0]] = vals[1]207    o.close()208    o = open(temp_file_res.name,"r")209    o.readline() # remove header210    pred_tax = dict()211    for i in o:212        vals = i.rstrip().split("\t")213        if len(vals) < 2:214            sys.stderr.write(f"{bco.Red}{bco.Bold} Error: less than two values ("+str(vals)+f"){bco.ResetAll}\n")215            os.remove(temp_file_res.name)216            sys.exit(1)217        pred_tax[vals[0]] = vals[1]218    o.close()219    # remove temp file220    os.remove(temp_file_res.name)221    # let's check the values222    if not set(pred_tax.keys()) == set(correct_tax.keys()):223        sys.stderr.write(f"{bco.Red}{bco.Bold} Error: different number of predicted genes{bco.ResetAll}\n")224        print(len(pred_tax), len(correct_tax), file=sys.stderr)225        print(*pred_tax.keys(), sep="\n")226        print("****")227        print(*correct_tax.keys(), sep="\n")228        sys.exit(1)229    # if we arrive here, we have the same set of predicted genes230    # let's check the predicted taxonomies231    error_flag = False232    for i in pred_tax:233        if pred_tax[i] != correct_tax[i]:234            error_flag = True235            sys.stderr.write(f"{bco.Red}{bco.Bold} Error: different taxonomy for "+i+"(correct:'"+correct_tax[i]+"', predicted:'"+pred_tax[i]+"')"+f"{bco.ResetAll}\n")236    if not error_flag:237        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")238    if error_found or error_flag:239        sys.exit(1)240    ############################################################################241    sys.stderr.write(f"{bco.Blue}{bco.Bold} ------------------------------------------------------------------------------{bco.ResetAll}\n")242    sys.stderr.write(f"{bco.Blue}{bco.Bold}|{bco.Green}                                    LONG STAG TEST                             {bco.Blue}|{bco.ResetAll}\n")243    sys.stderr.write(f"{bco.Blue}{bco.Bold} ------------------------------------------------------------------------------{bco.ResetAll}\n")244    # long test part 1: test building genome DB --------------------------------245    sys.stderr.write(f"{bco.Cyan}{bco.Bold}1-- Build genome database:{bco.ResetAll}\n")246    # prepare data247    link_db = "https://zenodo.org/record/4626959/files/train_genome_files.tar.gz"248    db_name = os.path.join(TEST_DATA_PATH, "train_genome_files.tar.gz")249    md5_db = "5ec5a527d25cc6d1f11a8ec50cd252a7"250    this_dir = download_and_checkmd5_and_decompress(link_db, db_name, md5_db, TEST_DATA_PATH)251    # train genome252    sys.stderr.write("  â  train genome DB:      ")253    sys.stderr.flush()254    gene_files = this_dir + "COG0012," + this_dir + "COG0016," + this_dir + "COG0018"255    merged_db = this_dir + "genes_ali.stagDB"256    thresholds = this_dir + "gene_thresholds"257    result_genome_DB = this_dir + "TEST_DB.stagDB"258    t0 = time.time()259    stag_command = "stag train_genome -v 1 -o "+result_genome_DB+" -i "+gene_files+" -T "+thresholds+" -C "+merged_db260    process = subprocess.run(stag_command.split())261    runtime = time.time() - t0262    if process.returncode:263        sys.stderr.write(f"{bco.Red}{bco.Bold} Error{bco.ResetAll} ({runtime:.3f}s)\n")264        sys.exit(1)265    else:266        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll} ({runtime:.3f}s)\n")267    # long test part 2: test classify genome -----------------------------------268    sys.stderr.write(f"{bco.Cyan}{bco.Bold}2-- Test genome classification:{bco.ResetAll}\n")269    # prepare data270    link_db = "https://zenodo.org/record/4626959/files/classify_genome_files.tar.gz"271    db_name = os.path.join(TEST_DATA_PATH, "classify_genome_files.tar.gz")272    md5_db = "819cc77d463a797a330d8d1d9437feca"273    this_dir = download_and_checkmd5_and_decompress(link_db, db_name, md5_db, TEST_DATA_PATH)274    # train genome275    sys.stderr.write("  â  classify 2 genomes:   ")276    sys.stderr.flush()277    genome_files = this_dir + "genomes"278    result = this_dir + "RESULT_TEMP"279    t0 = time.time()280    stag_command = "stag classify_genome -v 1 -o "+result+" -d "+result_genome_DB+" -D "+genome_files281    process = subprocess.run(stag_command.split())282    runtime = time.time() - t0283    if process.returncode:284        sys.stderr.write(f"{bco.Red}{bco.Bold} Error{bco.ResetAll} ({runtime:.3f}s)\n")285        sys.exit(1)286    else:287        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll} ({runtime:.3f}s)\n")288    # check result of the classification ---------------------------------------289    sys.stderr.write("  â  check result:         ")290    # correct annotation291    o = open(this_dir + "CORRECT_ASSIGNMENT/genome_annotation")292    o.readline()293    correct_classification = dict()294    all_genomes = dict()295    for line in o:296        vals = line.rstrip().split("\t")297        correct_classification[vals[0]] = vals[1]298        all_genomes[vals[0]] = False299    o.close()300    # this annotation301    o = open(this_dir + "RESULT_TEMP/genome_annotation")302    o.readline()303    for line in o:304        vals = line.rstrip().split("\t")305        vals[0] = vals[0].split("/")[-1]306        all_genomes[vals[0]] = True307        if not vals[0] in correct_classification:308            sys.stderr.write(f"{bco.Red}{bco.Bold} Error, too many lines{bco.ResetAll}\n")309            sys.stderr.write(f"{bco.Red}{bco.Bold} Check "+ this_dir + f"RESULT_TEMP/genome_annotation {bco.ResetAll}\n")310            sys.exit(1)311        else:312            if correct_classification[vals[0]] != vals[1]:313                sys.stderr.write(f"{bco.Red}{bco.Bold} Error, wrong calssification{bco.ResetAll}\n")314                sys.stderr.write(f"{bco.Red}{bco.Bold} Check "+ this_dir + f"RESULT_TEMP/genome_annotation {bco.ResetAll}\n")315                sys.exit(1)316    o.close()317    # check that all the genomes were profiled318    for genome in all_genomes:319        if not all_genomes[genome]:320            sys.stderr.write(f"{bco.Red}{bco.Bold} Error, some genomes are missing{bco.ResetAll}\n")321            sys.stderr.write(f"{bco.Red}{bco.Bold} Check "+ this_dir + f"RESULT_TEMP/genome_annotation {bco.ResetAll}\n")322            sys.exit(1)323    # if we arrive till here, then it's correct324    sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")325    # long test part 3: test gene train and classification with real data ------326    sys.stderr.write(f"{bco.Cyan}{bco.Bold}3-- Test real genes:{bco.ResetAll}\n")327    # prepare data328    link_db = "https://zenodo.org/record/4626959/files/test_gene.tar.gz"329    db_name = os.path.join(TEST_DATA_PATH, "test_gene.tar.gz")330    md5_db = "bee91d9dc06fae153502af386c29ca5c"331    this_dir = download_and_checkmd5_and_decompress(link_db, db_name, md5_db, TEST_DATA_PATH)332    # train genome333    sys.stderr.write("  â  train database:       ")334    sys.stderr.flush()335    seq_file = this_dir + "train.fna"336    protein_file = this_dir + "train.faa"337    tax_file = this_dir + "train.tax"338    hmm_file = this_dir + "COG0012.hmm"339    trained_db = this_dir + "TRAINED_TEMP"340    temp_file_db = tempfile.NamedTemporaryFile(delete=False, mode="w")341    t0 = time.time()342    stag_command = "stag train -f -o "+trained_db+" -i "+seq_file+" -p "+protein_file+" -x "+tax_file+" -a "+hmm_file + " -t 2"343    process = subprocess.run(stag_command.split())344    runtime = time.time() - t0345    if process.returncode:346        sys.stderr.write(f"{bco.Red}{bco.Bold} Error{bco.ResetAll} ({runtime:.3f}s)\n")347        sys.exit(1)348    else:349        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll} ({runtime:.3f}s)\n")350    sys.stderr.write("  â  classify:             ") #--------------------------------------351    sys.stderr.flush()352    res_classification = this_dir + "RES_TEMP"353    seq_file = this_dir + "test.fna"354    protein_file = this_dir + "test.faa"355    t0 = time.time()356    stag_command = "stag classify -v 1 -d "+trained_db+" -i "+seq_file+" -p "+protein_file+" -o "+res_classification357    process = subprocess.run(stag_command.split())358    runtime = time.time() - t0359    if process.returncode:360        sys.stderr.write(f"{bco.Red}{bco.Bold} Error{bco.ResetAll} ({runtime:.3f}s)\n")361        sys.exit(1)362    else:363        sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll} ({runtime:.3f}s)\n")364    # check result of the classification ---------------------------------------365    sys.stderr.write("  â  check result:         ")366    # correct annotation367    o = open(this_dir + "test.CORRECT_ASSIGNMENT")368    o.readline()369    correct_classification = dict()370    all_genomes = dict()371    for line in o:372        vals = line.rstrip().split("\t")373        correct_classification[vals[0]] = vals[1]374        all_genomes[vals[0]] = False375    o.close()376    # this annotation377    o = open(res_classification)378    o.readline()379    for line in o:380        vals = line.rstrip().split("\t")381        all_genomes[vals[0]] = True382        if not vals[0] in correct_classification:383            sys.stderr.write(f"{bco.Red}{bco.Bold} Error, too many lines{bco.ResetAll}\n")384            sys.stderr.write(f"{bco.Red}{bco.Bold} Check "+ res_classification + f"{bco.ResetAll}\n")385            sys.exit(1)386        else:387            if correct_classification[vals[0]] != vals[1]:388                sys.stderr.write(f"\n{bco.Yellow} Corr: "+correct_classification[vals[0]]+f"{bco.ResetAll}\n")389                sys.stderr.write(f"{bco.Yellow} Pred: "+vals[1]+f"{bco.ResetAll}\n")390            else:391                sys.stderr.write(f"\n{bco.LightGreen} Corr: "+correct_classification[vals[0]]+f"{bco.ResetAll}\n")392                sys.stderr.write(f"{bco.LightGreen} Pred: "+vals[1]+f"{bco.ResetAll}\n")393    o.close()394    # check that all the genomes were profiled395    for genome in all_genomes:396        if not all_genomes[genome]:397            sys.stderr.write(f"{bco.Red}{bco.Bold} Error, some genomes are missing{bco.ResetAll}\n")398            sys.stderr.write(f"{bco.Red}{bco.Bold} Check "+ res_classification + f"{bco.ResetAll}\n")399            sys.exit(1)400    # if we arrive till here, then it's correct401    sys.stderr.write(f"{bco.Green}{bco.Bold} correct{bco.ResetAll}\n")402    # print(*all_genomes.items(), sep="\n")403    return None        # success404#-------------------------------- run main -------------------------------------405if __name__ == '__main__':...

Color.py

Source:Color.py

1class Color_Manager:2    class BG:3        Black = '\033[40m'4        Red = '\033[41m'5        Green = '\033[42m'6        Yellow = '\033[43m'7        Blue = '\033[44m'8        Magenta = '\033[45m'9        Cyan = '\033[46m'10        White = '\033[47m'11        Reset = '\033[49m'12    class FG:13        Black = '\033[30m'14        Red = '\033[31m'15        Green = '\033[32m'16        Yellow = '\033[33m'17        Blue = '\033[34m'18        Magenta = '\033[35m'19        Cyan = '\033[36m'20        White = '\033[37m'21        Reset = '\033[39m'22    class BRIGHT:23        dim = '\033[2m'24        normal = '\033[22m'25    class EFFECT:26        ResetAll = '\033[0m'27        Italic = '\033[3m'28        Underline = '\033[4m'29        Bold = '\033[1m'30    @classmethod31    def FgPrint(cls, str, color):32        if color == 'Black':33            print(cls.FG.Black + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')34        elif color == 'Red':35            print(cls.FG.Red + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')36        elif color == 'Green':37            print(cls.FG.Green + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')38        elif color == 'Yellow':39            print(cls.FG.Yellow + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')40        elif color == 'Blue':41            print(cls.FG.Blue + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')42        elif color == 'Magenta':43            print(cls.FG.Magenta + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')44        elif color == 'Cyan':45            print(cls.FG.Cyan + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')46        elif color == 'White':47            print(cls.FG.White + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')48    @classmethod49    def BgPrint(cls, str, color):50        if color == 'Black':51            print(cls.BG.Black + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')52        elif color == 'Red':53            print(cls.BG.Red + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')54        elif color == 'Green':55            print(cls.BG.Green + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')56        elif color == 'Yellow':57            print(cls.BG.Yellow + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')58        elif color == 'Blue':59            print(cls.BG.Blue + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')60        elif color == 'Magenta':61            print(cls.BG.Magenta + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')62        elif color == 'Cyan':63            print(cls.BG.Cyan + cls.EFFECT.Bold + str + cls.EFFECT.ResetAll, end = ' ')64        elif color == 'White':...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.