Best Python code snippet using autotest_python
gametes_full_archive_gen.py
Source:gametes_full_archive_gen.py  
1"""2Author: Ryan Urbanowicz3Created: 11/30/204Description: Script to apply GAMETES to generate and organize a variety of SNP simulated models and a corresponding datasets5"""6import sys7import os8import argparse9import time10def main(argv):11    #Parse arguments12    parser = argparse.ArgumentParser(description="")13    #No defaults14    parser.add_argument('--output-path',dest='output_path',type=str,help='path to output directory')15    parser.add_argument('--archive-name', dest='archive_name',type=str, help='name of archive output folder (no spaces)')16    parser.add_argument('--run-parallel',dest='run_parallel',type=str,help='path to directory containing datasets',default="True")17    parser.add_argument('--use', dest='use', help='', type=str, default ='model') #defaults to model generation18    options = parser.parse_args(argv[1:])19    output_path = options.output_path20    archive_name = options.archive_name21    run_parallel = options.run_parallel22    use = options.use23    if not os.path.exists(output_path):24        os.mkdir(output_path)25    this_file_path = os.path.dirname(os.path.realpath(__file__))26    model_dest = output_path+'/'+archive_name+'/models'27    job_dest = output_path+'/temporary'+'/jobs'28    log_dest = output_path+'/temporary'+'/logs'29    #Create folders30    if not os.path.exists(output_path+'/'+archive_name):31        os.mkdir(output_path+'/'+archive_name)32    if not os.path.exists(model_dest):33        os.mkdir(model_dest)34    if not os.path.exists(output_path+'/temporary'):35        os.mkdir(output_path+'/temporary')36    if not os.path.exists(job_dest):37        os.mkdir(job_dest)38    if not os.path.exists(log_dest):39        os.mkdir(log_dest)40    if use == 'model':41        #Generate core main effect models42        univariate_core_model(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)43        #Generate core 2-way epistasis models44        epistasis_2_locus_core_model(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)45        #Generate 3-way epistasis models46        epistasis_3_locus_model(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)47    elif use == 'data':48        #Generate core main effect data49        univariate_core_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)50        #Generate core epistasis data51        epistasis_2_locus_core_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)52        #Generate 3-way epistasis data53        epistasis_3_locus_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)54        #Generate heterogeneous data (2 subgroups of 2-way epistasis)55        epistasis_2_locus_hetero_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)56        #Generate additive data (2 additively combined 2-way epistasis models, yielding 'impure' epistasis)57        epistasis_2_locus_additive_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)58        #Generate heterogeneous data (2 subgroups of univariate efects)59        univariate_2_locus_hetero_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)60        #Generate additive data (2 subgroups of univariate efects)61        univariate_2_locus_additive_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)62        #Generate heterogeneous data (4 subgroups of univariate efects)63        univariate_4_locus_hetero_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)64        #Generate additive data (4 subgroups of univariate efects)65        univariate_4_locus_additive_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)66        #Generate imbalanced dataset (with 2-way epistasis)67        epistasis_2_locus_imbalanced_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)68        #Generate continuous endpoint data (with 2-way epistasis)69        epistasis_2_locus_quantitative_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)70        #Generate increasing feature count datasets (with 2-way epistasis)71        epistasis_2_locus_numfeatures_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)72    else:73        print("GAMETES use not recognized.")74def univariate_core_model(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):75    #Define model parameters76    locus = 177    heritability = [0.05, 0.1, 0.2, 0.4]78    minorAF = [0.2]79    setK = True #if True, then K value will be specified as a constraint - if False, then K will be allowed to vary.80    K = 0.3  #population prevelance81    pop_count = 100000 #82    try_count = 1000000083    quantiles = 284    #Generate models85    for h in heritability:86        for m in minorAF:87            model_path_name = model_dest+"/L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)88            #Create gametes run command89            if setK:90                model_path_name = model_path_name+'_K_'+str(K)91                filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar '+'-M " -h '+str(h)+' -p '+str(K)92            else:93                filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar '+'-M " -h '+str(h)94            for i in range(locus):95                filewrite = filewrite +' -a '+str(m)96            filewrite = filewrite +' -o '+model_path_name+'.txt'+'" -q '+str(quantiles)+' -p '+str(pop_count)+' -t '+str(try_count)97            if run_parallel:98                job_ref = str(time.time())99                job_path_name = job_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'_run.sh'100                sh_file = open(job_path_name,'w')101                sh_file.write('#!/bin/bash\n')102                sh_file.write('#BSUB -q i2c2_normal'+'\n')103                sh_file.write('#BSUB -J '+job_ref+'\n')104                sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')105                sh_file.write('#BSUB -M 15GB'+'\n')106                sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'.o\n')107                sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'.e\n')108                sh_file.write(filewrite)109                sh_file.close()110                os.system('bsub < '+job_path_name)111                pass112            else:113                os.system(filewrite)114def epistasis_2_locus_core_model(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):115    #Define model parameters116    locus = 2117    heritability = [0.05, 0.1, 0.2, 0.4]118    minorAF = [0.2]119    setK = True #if True, then K value will be specified as a constraint - if False, then K will be allowed to vary.120    K = 0.3  #population prevelance121    pop_count = 100000 #122    try_count = 10000000123    quantiles = 2124    #Generate models125    for h in heritability:126        for m in minorAF:127            model_path_name = model_dest+"/L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)128            #Create gametes run command129            if setK:130                model_path_name = model_path_name+'_K_'+str(K)131                filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar '+'-M " -h '+str(h)+' -p '+str(K)132            else:133                filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar '+'-M " -h '+str(h)134            for i in range(locus):135                filewrite = filewrite +' -a '+str(m)136            filewrite = filewrite +' -o '+model_path_name+'.txt'+'" -q '+str(quantiles)+' -p '+str(pop_count)+' -t '+str(try_count)137            if run_parallel:138                job_ref = str(time.time())139                job_path_name = job_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'_run.sh'140                sh_file = open(job_path_name,'w')141                sh_file.write('#!/bin/bash\n')142                sh_file.write('#BSUB -q i2c2_normal'+'\n')143                sh_file.write('#BSUB -J '+job_ref+'\n')144                sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')145                sh_file.write('#BSUB -M 15GB'+'\n')146                sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'.o\n')147                sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'.e\n')148                sh_file.write(filewrite)149                sh_file.close()150                os.system('bsub < '+job_path_name)151                pass152            else:153                os.system(filewrite)154def epistasis_3_locus_model(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):155    #Define model parameters156    locus = 3157    heritability = [0.2]158    minorAF = [0.2]159    setK = True #if True, then K value will be specified as a constraint - if False, then K will be allowed to vary.160    K = 0.3  #population prevelance161    pop_count = 100000 #162    try_count = 100000000163    quantiles = 2164    #Generate models165    for h in heritability:166        for m in minorAF:167            model_path_name = model_dest+"/L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)168            #Create gametes run command169            if setK:170                model_path_name = model_path_name+'_K_'+str(K)171                filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar '+'-M " -h '+str(h)+' -p '+str(K)172            else:173                filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar '+'-M " -h '+str(h)174            for i in range(locus):175                filewrite = filewrite +' -a '+str(m)176            filewrite = filewrite +' -o '+model_path_name+'.txt'+'" -q '+str(quantiles)+' -p '+str(pop_count)+' -t '+str(try_count)177            if run_parallel:178                job_ref = str(time.time())179                job_path_name = job_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'_run.sh'180                sh_file = open(job_path_name,'w')181                sh_file.write('#!/bin/bash\n')182                sh_file.write('#BSUB -q i2c2_normal'+'\n')183                sh_file.write('#BSUB -J '+job_ref+'\n')184                sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')185                sh_file.write('#BSUB -M 15GB'+'\n')186                sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'.o\n')187                sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'.e\n')188                sh_file.write(filewrite)189                sh_file.close()190                os.system('bsub < '+job_path_name)191                pass192            else:193                os.system(filewrite)194def univariate_core_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):195    #Model parameters needed196    locus = 1197    heritability = [0.05, 0.1, 0.2, 0.4]198    minorAF = [0.2]199    K = 0.3  #population prevelance200    #Define dataset parameters201    data_name = 'gametes_univariate'202    samplesize = [200, 400, 800, 1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)203    numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]204    AF_Min = 0.01205    AF_Max = 0.5206    replicates = 30 #100207    #Make dataset folder208    if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):209        os.mkdir(output_path+'/'+archive_name+'/'+data_name)210    #Generate datasets and folders211    for n in numberofattributes:212        for s in samplesize:213            for h in heritability:214                for m in minorAF:215                    modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)216                    modelFile = model_dest+'/'+modelName+"_Models.txt"217                    genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)218                    #Create gametes run command219                    filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -D "-n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'220                    if run_parallel:221                        job_ref = str(time.time())222                        job_path_name = job_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'223                        sh_file = open(job_path_name,'w')224                        sh_file.write('#!/bin/bash\n')225                        sh_file.write('#BSUB -q i2c2_normal'+'\n')226                        sh_file.write('#BSUB -J '+job_ref+'\n')227                        sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')228                        sh_file.write('#BSUB -M 15GB'+'\n')229                        sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')230                        sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')231                        sh_file.write(filewrite)232                        sh_file.close()233                        os.system('bsub < '+job_path_name)234                        pass235                    else:236                        os.system(filewrite)237def epistasis_2_locus_core_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):238    #Model parameters needed239    locus = 2240    heritability = [0.05, 0.1, 0.2, 0.4]241    minorAF = [0.2]242    K = 0.3  #population prevelance243    #Define dataset parameters244    data_name = 'gametes_2way_epistasis'245    samplesize = [200, 400, 800, 1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)246    numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]247    AF_Min = 0.01248    AF_Max = 0.5249    replicates = 30 #100250    #Make dataset folder251    if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):252        os.mkdir(output_path+'/'+archive_name+'/'+data_name)253    #Generate datasets and folders254    for n in numberofattributes:255        for s in samplesize:256            for h in heritability:257                for m in minorAF:258                    modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)259                    modelFile = model_dest+'/'+modelName+"_Models.txt"260                    genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)261                    #Create gametes run command262                    filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -D "-n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'263                    if run_parallel:264                        job_ref = str(time.time())265                        job_path_name = job_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'266                        sh_file = open(job_path_name,'w')267                        sh_file.write('#!/bin/bash\n')268                        sh_file.write('#BSUB -q i2c2_normal'+'\n')269                        sh_file.write('#BSUB -J '+job_ref+'\n')270                        sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')271                        sh_file.write('#BSUB -M 15GB'+'\n')272                        sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')273                        sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')274                        sh_file.write(filewrite)275                        sh_file.close()276                        os.system('bsub < '+job_path_name)277                        pass278                    else:279                        os.system(filewrite)280def epistasis_3_locus_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):281    #Model parameters needed282    locus = 3283    heritability = [0.2]284    minorAF = [0.2]285    K = 0.3  #population prevelance286    #Define dataset parameters287    data_name = 'gametes_3way_epistasis'288    samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)289    numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]290    AF_Min = 0.01291    AF_Max = 0.5292    replicates = 30 #100293    #Make dataset folder294    if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):295        os.mkdir(output_path+'/'+archive_name+'/'+data_name)296    #Generate datasets and folders297    for n in numberofattributes:298        for s in samplesize:299            for h in heritability:300                for m in minorAF:301                    modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)302                    modelFile = model_dest+'/'+modelName+"_Models.txt"303                    genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)304                    #Create gametes run command305                    filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -D "-n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'306                    if run_parallel:307                        job_ref = str(time.time())308                        job_path_name = job_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'309                        sh_file = open(job_path_name,'w')310                        sh_file.write('#!/bin/bash\n')311                        sh_file.write('#BSUB -q i2c2_normal'+'\n')312                        sh_file.write('#BSUB -J '+job_ref+'\n')313                        sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')314                        sh_file.write('#BSUB -M 15GB'+'\n')315                        sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')316                        sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')317                        sh_file.write(filewrite)318                        sh_file.close()319                        os.system('bsub < '+job_path_name)320                        pass321                    else:322                        os.system(filewrite)323def epistasis_2_locus_hetero_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):324    #Model parameters needed325    locus = 2326    heritability = [0.4]327    minorAF = [0.2]328    K = 0.3  #population prevelance329    #Define dataset parameters330    data_name = 'gametes_2way_epi_2het'331    samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)332    numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]333    weight = [50,75]  # have to do the math for both ratio, X and 100-X334    AF_Min = 0.01335    AF_Max = 0.5336    replicates = 30 #100337    #Make dataset folder338    if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):339        os.mkdir(output_path+'/'+archive_name+'/'+data_name)340    #Generate datasets and folders341    for n in numberofattributes:342        for s in samplesize:343            for h in heritability:344                for m in minorAF:345                    modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)346                    modelFile = model_dest+'/'+modelName+"_Models.txt"347                    for w in weight:348                        genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)349                        #Create gametes run command350                        filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(100-w)+' -D "-h heterogeneous -b -n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'351                        if run_parallel:352                            job_ref = str(time.time())353                            job_path_name = job_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'354                            sh_file = open(job_path_name,'w')355                            sh_file.write('#!/bin/bash\n')356                            sh_file.write('#BSUB -q i2c2_normal'+'\n')357                            sh_file.write('#BSUB -J '+job_ref+'\n')358                            sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')359                            sh_file.write('#BSUB -M 15GB'+'\n')360                            sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')361                            sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')362                            sh_file.write(filewrite)363                            sh_file.close()364                            os.system('bsub < '+job_path_name)365                            pass366                        else:367                            os.system(filewrite)368def epistasis_2_locus_additive_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):369    #Model parameters needed370    locus = 2371    heritability = [0.4]372    minorAF = [0.2]373    K = 0.3  #population prevelance374    #Define dataset parameters375    data_name = 'gametes_2way_epi_2add'376    samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)377    numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]378    weight = [50,75]  # have to do the math for both ratio, X and 100-X379    AF_Min = 0.01380    AF_Max = 0.5381    replicates = 30 #100382    #Make dataset folder383    if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):384        os.mkdir(output_path+'/'+archive_name+'/'+data_name)385    #Generate datasets and folders386    for n in numberofattributes:387        for s in samplesize:388            for h in heritability:389                for m in minorAF:390                    modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)391                    modelFile = model_dest+'/'+modelName+"_Models.txt"392                    for w in weight:393                        genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)394                        #Create gametes run command395                        filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(100-w)+' -D "-h hierarchical -n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'396                        if run_parallel:397                            job_ref = str(time.time())398                            job_path_name = job_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'399                            sh_file = open(job_path_name,'w')400                            sh_file.write('#!/bin/bash\n')401                            sh_file.write('#BSUB -q i2c2_normal'+'\n')402                            sh_file.write('#BSUB -J '+job_ref+'\n')403                            sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')404                            sh_file.write('#BSUB -M 15GB'+'\n')405                            sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')406                            sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')407                            sh_file.write(filewrite)408                            sh_file.close()409                            os.system('bsub < '+job_path_name)410                            pass411                        else:412                            os.system(filewrite)413def univariate_2_locus_hetero_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):414    #Model parameters needed415    locus = 1416    heritability = [0.4]417    minorAF = [0.2]418    K = 0.3  #population prevelance419    #Define dataset parameters420    data_name = 'gametes_uni_2het'421    samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)422    numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]423    weight = [50]  # have to do the math for both ratio, X and 100-X424    AF_Min = 0.01425    AF_Max = 0.5426    replicates = 30 #100427    #Make dataset folder428    if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):429        os.mkdir(output_path+'/'+archive_name+'/'+data_name)430    #Generate datasets and folders431    for n in numberofattributes:432        for s in samplesize:433            for h in heritability:434                for m in minorAF:435                    modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)436                    modelFile = model_dest+'/'+modelName+"_Models.txt"437                    for w in weight:438                        genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)439                        #Create gametes run command440                        filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(w)+' -D "-h heterogeneous -b -n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'441                        if run_parallel:442                            job_ref = str(time.time())443                            job_path_name = job_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'444                            sh_file = open(job_path_name,'w')445                            sh_file.write('#!/bin/bash\n')446                            sh_file.write('#BSUB -q i2c2_normal'+'\n')447                            sh_file.write('#BSUB -J '+job_ref+'\n')448                            sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')449                            sh_file.write('#BSUB -M 15GB'+'\n')450                            sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')451                            sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')452                            sh_file.write(filewrite)453                            sh_file.close()454                            os.system('bsub < '+job_path_name)455                            pass456                        else:457                            os.system(filewrite)458def univariate_2_locus_additive_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):459    #Model parameters needed460    locus = 1461    heritability = [0.4]462    minorAF = [0.2]463    K = 0.3  #population prevelance464    #Define dataset parameters465    data_name = 'gametes_uni_2add'466    samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)467    numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]468    weight = [50]  # have to do the math for both ratio, X and 100-X469    AF_Min = 0.01470    AF_Max = 0.5471    replicates = 30 #100472    #Make dataset folder473    if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):474        os.mkdir(output_path+'/'+archive_name+'/'+data_name)475    #Generate datasets and folders476    for n in numberofattributes:477        for s in samplesize:478            for h in heritability:479                for m in minorAF:480                    modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)481                    modelFile = model_dest+'/'+modelName+"_Models.txt"482                    for w in weight:483                        genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)484                        #Create gametes run command485                        filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(w)+' -D "-h hierarchical -b -n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'486                        if run_parallel:487                            job_ref = str(time.time())488                            job_path_name = job_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'489                            sh_file = open(job_path_name,'w')490                            sh_file.write('#!/bin/bash\n')491                            sh_file.write('#BSUB -q i2c2_normal'+'\n')492                            sh_file.write('#BSUB -J '+job_ref+'\n')493                            sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')494                            sh_file.write('#BSUB -M 15GB'+'\n')495                            sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')496                            sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')497                            sh_file.write(filewrite)498                            sh_file.close()499                            os.system('bsub < '+job_path_name)500                            pass501                        else:502                            os.system(filewrite)503def univariate_4_locus_hetero_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):504    #Model parameters needed505    locus = 1506    heritability = [0.4]507    minorAF = [0.2]508    K = 0.3  #population prevelance509    #Define dataset parameters510    data_name = 'gametes_uni_4het'511    samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)512    numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]513    weight = [50]  # have to do the math for both ratio, X and 100-X514    AF_Min = 0.01515    AF_Max = 0.5516    replicates = 30 #100517    #Make dataset folder518    if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):519        os.mkdir(output_path+'/'+archive_name+'/'+data_name)520    #Generate datasets and folders521    for n in numberofattributes:522        for s in samplesize:523            for h in heritability:524                for m in minorAF:525                    modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)526                    modelFile = model_dest+'/'+modelName+"_Models.txt"527                    for w in weight:528                        genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)529                        #Create gametes run command530                        filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(w)+' -D "-h heterogeneous -b -n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'531                        if run_parallel:532                            job_ref = str(time.time())533                            job_path_name = job_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'534                            sh_file = open(job_path_name,'w')535                            sh_file.write('#!/bin/bash\n')536                            sh_file.write('#BSUB -q i2c2_normal'+'\n')537                            sh_file.write('#BSUB -J '+job_ref+'\n')538                            sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')539                            sh_file.write('#BSUB -M 15GB'+'\n')540                            sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')541                            sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')542                            sh_file.write(filewrite)543                            sh_file.close()544                            os.system('bsub < '+job_path_name)545                            pass546                        else:547                            os.system(filewrite)548def univariate_4_locus_additive_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):549    #Model parameters needed550    locus = 1551    heritability = [0.4]552    minorAF = [0.2]553    K = 0.3  #population prevelance554    #Define dataset parameters555    data_name = 'gametes_uni_4add'556    samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)557    numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]558    weight = [50]  # have to do the math for both ratio, X and 100-X559    AF_Min = 0.01560    AF_Max = 0.5561    replicates = 30 #100562    #Make dataset folder563    if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):564        os.mkdir(output_path+'/'+archive_name+'/'+data_name)565    #Generate datasets and folders566    for n in numberofattributes:567        for s in samplesize:568            for h in heritability:569                for m in minorAF:570                    modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)571                    modelFile = model_dest+'/'+modelName+"_Models.txt"572                    for w in weight:573                        genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)574                        #Create gametes run command575                        filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(w)+' -D "-h hierarchical -b -n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'576                        if run_parallel:577                            job_ref = str(time.time())578                            job_path_name = job_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'579                            sh_file = open(job_path_name,'w')580                            sh_file.write('#!/bin/bash\n')581                            sh_file.write('#BSUB -q i2c2_normal'+'\n')582                            sh_file.write('#BSUB -J '+job_ref+'\n')583                            sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')584                            sh_file.write('#BSUB -M 15GB'+'\n')585                            sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')586                            sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')587                            sh_file.write(filewrite)588                            sh_file.close()589                            os.system('bsub < '+job_path_name)590                            pass591                        else:592                            os.system(filewrite)593def epistasis_2_locus_imbalanced_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):594    #Model parameters needed595    locus = 2596    heritability = [0.4]597    minorAF = [0.2]598    K = 0.3  #population prevelance599    #Define dataset parameters600    data_name = 'gametes_2way_epistasis_inbal'601    samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)602    numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]603    balance = [.6,.9]604    AF_Min = 0.01605    AF_Max = 0.5606    replicates = 30 #100607    #Make dataset folder608    if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):609        os.mkdir(output_path+'/'+archive_name+'/'+data_name)610    #Generate datasets and folders611    for n in numberofattributes:612        for s in samplesize:613            for h in heritability:614                for m in minorAF:615                    for b in balance:616                        #Calculate case and control counts617                        controlCount = int(float(s)*b)618                        caseCount = int(s-controlCount)619                        modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)620                        modelFile = model_dest+'/'+modelName+"_Models.txt"621                        genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_'+str(b)+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)622                        #Create gametes run command623                        filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -D "-n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(caseCount)+' -w '+str(controlCount)+' -r '+str(replicates)+' -o '+str(genDataName)+'"'624                        if run_parallel:625                            job_ref = str(time.time())626                            job_path_name = job_dest+'/gametes_'+data_name+'_'+str(b)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'627                            sh_file = open(job_path_name,'w')628                            sh_file.write('#!/bin/bash\n')629                            sh_file.write('#BSUB -q i2c2_normal'+'\n')630                            sh_file.write('#BSUB -J '+job_ref+'\n')631                            sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')632                            sh_file.write('#BSUB -M 15GB'+'\n')633                            sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_'+str(b)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')634                            sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_'+str(b)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')635                            sh_file.write(filewrite)636                            sh_file.close()637                            os.system('bsub < '+job_path_name)638                            pass639                        else:640                            os.system(filewrite)641def epistasis_2_locus_quantitative_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):642    #Model parameters needed643    locus = 2644    heritability = [0.4]645    minorAF = [0.2]646    K = 0.3  #population prevelance647    #Define dataset parameters648    data_name = 'gametes_2way_epistasis_quant'649    samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)650    numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]651    standardDev = [0.2,0.5,0.8]652    AF_Min = 0.01653    AF_Max = 0.5654    replicates = 30 #100655    #Make dataset folder656    if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):657        os.mkdir(output_path+'/'+archive_name+'/'+data_name)658    #Generate datasets and folders659    for n in numberofattributes:660        for s in samplesize:661            for h in heritability:662                for m in minorAF:663                    for d in standardDev:664                        modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)665                        modelFile = model_dest+'/'+modelName+"_Models.txt"666                        genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_'+str(d)+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)667                        #Create gametes run command668                        filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -D "-c -d '+ str(d) + ' -t '+ str(s) + ' -n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -r '+str(replicates)+' -o '+str(genDataName)+'"'669                        if run_parallel:670                            job_ref = str(time.time())671                            job_path_name = job_dest+'/gametes_'+data_name+'_'+str(d)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'672                            sh_file = open(job_path_name,'w')673                            sh_file.write('#!/bin/bash\n')674                            sh_file.write('#BSUB -q i2c2_normal'+'\n')675                            sh_file.write('#BSUB -J '+job_ref+'\n')676                            sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')677                            sh_file.write('#BSUB -M 15GB'+'\n')678                            sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_'+str(d)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')679                            sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_'+str(d)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')680                            sh_file.write(filewrite)681                            sh_file.close()682                            os.system('bsub < '+job_path_name)683                            pass684                        else:685                            os.system(filewrite)686def epistasis_2_locus_numfeatures_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):687    #Model parameters needed688    locus = 2689    heritability = [0.4]690    minorAF = [0.2]691    K = 0.3  #population prevelance692    #Define dataset parameters693    data_name = 'gametes_2way_epistasis_numfeat'694    samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)695    numberofattributes = [1000,10000,100000] # [20, 100, 1000, 10000, 100000] #[200, 100]696    AF_Min = 0.01697    AF_Max = 0.5698    replicates = 30 #100699    #Make dataset folder700    if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):701        os.mkdir(output_path+'/'+archive_name+'/'+data_name)702    #Generate datasets and folders703    for n in numberofattributes:704        for s in samplesize:705            for h in heritability:706                for m in minorAF:707                    modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)708                    modelFile = model_dest+'/'+modelName+"_Models.txt"709                    genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)710                    #Create gametes run command711                    filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -D "-n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'712                    if run_parallel:713                        job_ref = str(time.time())714                        job_path_name = job_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'715                        sh_file = open(job_path_name,'w')716                        sh_file.write('#!/bin/bash\n')717                        sh_file.write('#BSUB -q i2c2_normal'+'\n')718                        sh_file.write('#BSUB -J '+job_ref+'\n')719                        sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')720                        sh_file.write('#BSUB -M 15GB'+'\n')721                        sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')722                        sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')723                        sh_file.write(filewrite)724                        sh_file.close()725                        os.system('bsub < '+job_path_name)726                        pass727                    else:728                        os.system(filewrite)729######################################730if __name__ == '__main__':...main.py
Source:main.py  
...285                              (["reference", "fasta"], ["reference", "aligner"], ["files"])),286                        samples, config, dirs, "multicore",287                        multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:288            with profile.report("alignment preparation", dirs):289                samples = run_parallel("prep_align_inputs", samples)290                samples = disambiguate.split(samples)291            with profile.report("alignment", dirs):292                samples = run_parallel("process_alignment", samples)293                samples = alignprep.merge_split_alignments(samples, run_parallel)294                samples = disambiguate.resolve(samples, run_parallel)295            with profile.report("callable regions", dirs):296                samples = run_parallel("postprocess_alignment", samples)297                samples = run_parallel("combine_sample_regions", [samples])298                samples = region.clean_sample_data(samples)299            with profile.report("coverage", dirs):300                samples = coverage.summarize_samples(samples, run_parallel)301        ## Variant calling on sub-regions of the input file (full cluster)302        with prun.start(_wres(parallel, ["gatk", "picard", "variantcaller"]),303                        samples, config, dirs, "full",304                        multiplier=region.get_max_counts(samples), max_multicore=1) as run_parallel:305            with profile.report("alignment post-processing", dirs):306                samples = region.parallel_prep_region(samples, run_parallel)307            with profile.report("variant calling", dirs):308                samples = genotype.parallel_variantcall_region(samples, run_parallel)309        ## Finalize variants (per-sample cluster)310        with prun.start(_wres(parallel, ["gatk", "gatk-vqsr", "snpeff", "bcbio_variation"]),311                        samples, config, dirs, "persample") as run_parallel:312            with profile.report("variant post-processing", dirs):313                samples = run_parallel("postprocess_variants", samples)314                samples = run_parallel("split_variants_by_sample", samples)315            with profile.report("validation", dirs):316                samples = run_parallel("compare_to_rm", samples)317                samples = genotype.combine_multiple_callers(samples)318        ## Finalizing BAMs and population databases, handle multicore computation319        with prun.start(_wres(parallel, ["gemini", "samtools", "fastqc", "bamtools", "bcbio_variation",320                                         "bcbio-variation-recall"]),321                        samples, config, dirs, "multicore2") as run_parallel:322            with profile.report("prepped BAM merging", dirs):323                samples = region.delayed_bamprep_merge(samples, run_parallel)324            with profile.report("ensemble calling", dirs):325                samples = ensemble.combine_calls_parallel(samples, run_parallel)326            with profile.report("validation summary", dirs):327                samples = validate.summarize_grading(samples)328            with profile.report("structural variation", dirs):329                samples = structural.run(samples, run_parallel)330            with profile.report("population database", dirs):331                samples = population.prep_db_parallel(samples, run_parallel)332            with profile.report("quality control", dirs):333                samples = qcsummary.generate_parallel(samples, run_parallel)334            with profile.report("archive", dirs):335                samples = archive.compress(samples, run_parallel)336        logger.info("Timing: finished")337        return samples338def _debug_samples(i, samples):339    print "---", i, len(samples)340    for sample in (x[0] for x in samples):341        print "  ", sample["description"], sample.get("region"), \342            utils.get_in(sample, ("config", "algorithm", "variantcaller")), \343            [x.get("variantcaller") for x in sample.get("variants", [])], \344            sample.get("work_bam")345class SNPCallingPipeline(Variant2Pipeline):346    """Back compatible: old name for variant analysis.347    """348    name = "SNP calling"349class VariantPipeline(Variant2Pipeline):350    """Back compatibility; old name351    """352    name = "variant"353class StandardPipeline(AbstractPipeline):354    """Minimal pipeline with alignment and QC.355    """356    name = "Standard"357    @classmethod358    def run(self, config, config_file, parallel, dirs, lane_items):359        ## Alignment and preparation requiring the entire input file (multicore cluster)360        with prun.start(_wres(parallel, ["aligner"]),361                        lane_items, config, dirs, "multicore") as run_parallel:362            with profile.report("alignment", dirs):363                samples = run_parallel("process_alignment", lane_items)364            with profile.report("callable regions", dirs):365                samples = run_parallel("postprocess_alignment", samples)366                samples = run_parallel("combine_sample_regions", [samples])367                samples = region.clean_sample_data(samples)368        ## Quality control369        with prun.start(_wres(parallel, ["fastqc", "bamtools", "samtools"]),370                        samples, config, dirs, "multicore2") as run_parallel:371            with profile.report("quality control", dirs):372                samples = qcsummary.generate_parallel(samples, run_parallel)373        logger.info("Timing: finished")374        return samples375class MinimalPipeline(StandardPipeline):376    name = "Minimal"377class RnaseqPipeline(AbstractPipeline):378    name = "RNA-seq"379    @classmethod380    def run(self, config, config_file, parallel, dirs, samples):381        with prun.start(_wres(parallel, ["picard", "AlienTrimmer"]),382                        samples, config, dirs, "trimming") as run_parallel:383            with profile.report("adapter trimming", dirs):384                samples = run_parallel("process_lane", samples)385                samples = run_parallel("trim_lane", samples)386        with prun.start(_wres(parallel, ["aligner"],387                              ensure_mem={"tophat": 8, "tophat2": 8, "star": 30}),388                        samples, config, dirs, "multicore",389                        multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:390            with profile.report("alignment", dirs):391                samples = disambiguate.split(samples)392                samples = run_parallel("process_alignment", samples)393        with prun.start(_wres(parallel, ["samtools", "cufflinks"]),394                        samples, config, dirs, "rnaseqcount") as run_parallel:395            with profile.report("disambiguation", dirs):396                samples = disambiguate.resolve(samples, run_parallel)397            with profile.report("estimate expression", dirs):398                samples = rnaseq.estimate_expression(samples, run_parallel)399        combined = combine_count_files([x[0].get("count_file") for x in samples])400        gtf_file = utils.get_in(samples[0][0], ('genome_resources', 'rnaseq',401                                                'transcripts'), None)402        annotated = annotate_combined_count_file(combined, gtf_file)403        for x in samples:404            x[0]["combined_counts"] = combined405            if annotated:406                x[0]["annotated_combined_counts"] = annotated407        with prun.start(_wres(parallel, ["picard", "fastqc", "rnaseqc"]),408                        samples, config, dirs, "persample") as run_parallel:409            with profile.report("quality control", dirs):410                samples = qcsummary.generate_parallel(samples, run_parallel)411        logger.info("Timing: finished")412        return samples413class ChipseqPipeline(AbstractPipeline):414    name = "chip-seq"415    @classmethod416    def run(self, config, config_file, parallel, dirs, samples):417        with prun.start(_wres(parallel, ["aligner", "picard"]),418                        samples, config, dirs, "multicore",419                        multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:420            samples = run_parallel("process_lane", samples)421            samples = run_parallel("trim_lane", samples)422            samples = disambiguate.split(samples)423            samples = run_parallel("process_alignment", samples)424        with prun.start(_wres(parallel, ["picard", "fastqc"]),425                        samples, config, dirs, "persample") as run_parallel:426            samples = run_parallel("clean_chipseq_alignment", samples)427            samples = qcsummary.generate_parallel(samples, run_parallel)428        return samples429def _get_pipeline(item):430    from bcbio.log import logger431    SUPPORTED_PIPELINES = {x.name.lower(): x for x in432                           utils.itersubclasses(AbstractPipeline)}433    analysis_type = item.get("analysis", "").lower()434    if analysis_type not in SUPPORTED_PIPELINES:435        logger.error("Cannot determine which type of analysis to run, "436                      "set in the run_info under details.")437        sys.exit(1)438    else:439        return SUPPORTED_PIPELINES[analysis_type]440def _pair_lanes_with_pipelines(lane_items):...test_run_parallel.py
Source:test_run_parallel.py  
...12        def funtion_error():13            raise ValueError('Error in values!')14        for use_multiprocess in (True, False):15            try:16                val1, val2 = run_parallel([17                    function_no_error,18                    funtion_error19                ], multiprocess=use_multiprocess)20            except Exception, e:21                self.assertIsInstance(e, ErrorInProcessException)22                self.assertEqual(1, len(e.errors))23    def test_return_value_order(self):24        """Tests that return values are returned in the order the functions are passed to run_parallel"""25        def return_first():26            time.sleep(0.0)27            return 128        def return_second():29            time.sleep(0.1)30            return 231        def return_third():32            time.sleep(0.2)33            return 334        def return_fourth():35            time.sleep(0.3)36            return 437        for use_multiprocess in (True, False):38            val1, val2, val3, val4 = run_parallel([39                return_second,40                return_first,41                return_third,42                return_fourth], multiprocess=use_multiprocess)43            self.assertEqual(val1, 2)44            self.assertEqual(val2, 1)45            self.assertEqual(val3, 3)...run_parallel_test.py
Source:run_parallel_test.py  
...15    """Test that the result is returned in the correct order."""16    script = MyTestResultOrder(tmpdir)17    data = 1018    params = [(data, p) for p in range(10)]19    results = run_parallel(script.experiment, params)20    assert results == [script.experiment(*p) for p in params]21class MyTestMultipleParams(util.TestScript):22    def experiment(self, data, exponent, bias):23        return data**exponent + bias24def test_multiple_params(tmpdir):25    """Test run_parallel when the experiment has multiple parameters."""26    script = MyTestMultipleParams(tmpdir)27    data = 1028    params = [(data, p, b) for p, b in zip(range(10), range(10, 20))]29    results = run_parallel(script.experiment, params)...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
