Unlock 30% off on Manual Testing Annual Plans this Holiday Season.

Copied to Clipboard!

How to use extract_data method in Airtest

Best Python code snippet using Airtest

repository.py

Source:repository.py

1"""Interface to the CKAN data repository, for uploading bundle records and data extracts. 2Copyright (c) 2013 Clarinova. This file is licensed under the terms of the3Revised BSD License, included in this distribution as LICENSE.txt4"""5from databundles.dbexceptions import ConfigurationError6import petl.fluent as petlf7class Repository(object):8    '''Interface to the CKAN data repository, for uploading bundle records and 9    data extracts. classdocs10    '''11    def __init__(self, bundle, repo_name='default'):12        '''Create a new repository interface13        '''  14        import databundles.client.ckan15        import time, datetime16        self.bundle = bundle 17        self.extracts = self.bundle.config.group('extracts')18        self.partitions = self.bundle.partitions   19        self.repo_name = repo_name20        self._api = None21   22    @property23    def api(self):24        if not self._api:25            self.set_api()26            27        return self._api28    def set_api(self): 29        import databundles.client.ckan30        repo_group = self.bundle.config.group('repository')31        32        if not repo_group.get(self.repo_name): 33            raise ConfigurationError("'repository' group in configure either nonexistent"+34                                     " or missing {} sub-group ".format(self.repo_name))35        36        repo_config = repo_group.get(self.repo_name)37        38        self._api =  databundles.client.ckan.Ckan( repo_config.url, repo_config.key)   39        40        return self.api41        42   43    def _validate_for_expr(self, astr,debug=False):44        """Check that an expression is save to evaluate"""45        import os46        import ast47        try: tree=ast.parse(astr)48        except SyntaxError: raise ValueError(49                    "Could not parse code expression : \"{}\" ".format(astr)+50                    " ")51        for node in ast.walk(tree):52            if isinstance(node,(ast.Module,53                                ast.Expr,54                                ast.Dict,55                                ast.Str,56                                ast.Attribute,57                                ast.Num,58                                ast.Name,59                                ast.Load,60                                ast.BinOp,61                                ast.Compare,62                                ast.Eq,63                                ast.Import,64                                ast.alias,65                                ast.Call66                                )): 67                continue68            if (isinstance(node,ast.Call)69                    and isinstance(node.func, ast.Attribute)70                    and node.func.attr == 'datetime'): 71                continue72            if debug:73                attrs=[attr for attr in dir(node) if not attr.startswith('__')]74                print(node)75                for attrname in attrs:76                    print('    {k} ==> {v}'.format(k=attrname,v=getattr(node,attrname)))77            raise ValueError("Bad node {} in {}. This code is not allowed to execute".format(node,astr))78        return True79    def _do_extract(self, extract_data, force=False):80        import os # For the eval @UnusedImport81        82        done_if = extract_data.get('done_if',False)83 84        if not force and done_if and self._validate_for_expr(done_if, True):85            if eval(done_if): 86                self.bundle.log("For extract {}, done_if ( {} ) evaluated true"87                         .format(extract_data['_name'], done_if)) 88                return extract_data['path']89        if extract_data.get('function',False):90            file_ = self._do_function_extract(extract_data)91        elif extract_data.get('query',False):92            file_ = self._do_query_extract(extract_data)93        else:94            from databundles.dbexceptions import ConfigurationError95            raise ConfigurationError("Bad Extract config: {}".format(extract_data))96        return file_97        98    def _do_function_extract(self, extract_data):99        '''Run a function on the build that produces a file to upload'''100        import os.path101        102        f_name = extract_data['function']103        104        f = getattr(self.bundle, f_name)105    106        file_ = f(extract_data)        107        return file_108                       109    def _do_query_extract(self,  extract_data):110        """Extract a CSV file and  upload it to CKAN"""111        import tempfile112        import uuid113        import os114        p = extract_data['_partition'] # Set in _make_partition_dict115     116        file_name = extract_data.get('name', None)117        118        if file_name:119            file_ = self.bundle.filesystem.path('extracts', file_name)120        else:121            file_ =  os.path.join(tempfile.gettempdir(), str(uuid.uuid4()) )122    123        self.bundle.log("Extracting {} to {}".format(extract_data['title'],file_))124        petlf.fromsqlite3(p.database.path, extract_data['query'] ).tocsv(file_) #@UndefinedVariable125  126        return file_       127    128    def _send(self, package, extract_data, file_):129        import os130        import mimetypes131        132        _, ext = os.path.splitext(file_)133        mimetypes.init()134        content_type = mimetypes.types_map.get(ext,None)  #@UndefinedVariable135        136        try:137            _,format = content_type.split('/')138        except:139            format = None140        141        name = extract_data.get('name', os.path.basename(file_))142        143        144        r = self.api.add_file_resource(package, file_, 145                            name=name,146                            description=extract_data['description'],147                            content_type = content_type, 148                            format=format149                            )150        151        152        return r153        154    def _make_partition_dict(self, p):155        '''Return a dict that includes the fields from the extract expanded for156        the values of each and the partition'''157        158        qd = {159            'p_id' : p.identity.id_,160            'p_name' : p.identity.name,161         }162        163        try:164            # Bundles don't have these      165            qd_part = {166                'p_table' : p.identity.table,167                'p_space' : p.identity.space,168                'p_time' : p.identity.time,169                'p_grain' : p.identity.grain,              170                }171        except:172            qd_part = {'p_table' : '','p_space' : '', 'p_time' :'','p_grain' : ''}173            174        qd =  dict(qd.items()+ qd_part.items())175        qd['_partition'] = p176        return qd177    178    def _expand_each(self, each):179        '''Generate a set of dicts from the cross product of each of the180        arrays of 'each' group'''181        182        # Normalize the each group, particular for the case where there is only183        # one dimension184  185        if not isinstance(each, list):186            raise ConfigurationError("The 'each' key must have a list. Got a {} ".format(type(each)))187        188        elif len(each) == 0:189            each = [[{}]]190        if not isinstance(each[0], list):191            each = [each]192        193        # Now the top level arrays of each are dimensions, and we can do a 194        # multi dimensional iteration over them. 195        # This is essentially a cross-product, where out <- out X dim(i)196        out = []197        for i,dim in enumerate(each):198            if i == 0:199                out = dim200            else:201                o2 = []202                for i in dim:203                    for j in out:204                        o2.append(dict(i.items()+j.items()))205                out = o2206        return out207        208        209    def _expand_partitions(self, partition_name='any', for_=None):210        '''Generate a list of partitions to apply the extract process to. '''211        if partition_name == 'any':212            partitions = [p for p in self.partitions]213            partitions = [self.bundle] + partitions214        else:215            partition = self.partitions.get(partition_name)216            partitions = [partition]217        out = []218         219        if not for_:220            for_ = 'True'221         222        for partition in partitions:223         224            try:225                self.bundle.log("Testing: {} ".format(partition.identity.name))226                if self._validate_for_expr(for_, True):227                    if eval(for_):  228                        out.append(partition)229            except Exception as e:230                self.bundle.error("Error in evaluting for '{}' : {} ".format(for_, e))231          232        return out233         234    def _sub(self, data):235        236        if data.get('aa', False):237            from databundles.geo.analysisarea import get_analysis_area238            aa = get_analysis_area(self.bundle.library, **data['aa'])    239        240            aa_d  = dict(aa.__dict__)241            aa_d['aa_name'] = aa_d['name']242            del  aa_d['name']243            244            data = dict(data.items() + aa_d.items())245        data['query'] = data.get('query','').format(**data)246        data['title'] = data.get('title','').format(**data)247        data['description'] = data.get('description','').format(**data)248        data['name'] = data.get('name','').format(**data)249        data['path'] = self.bundle.filesystem.path('extracts',format(data['name']))250        data['done_if'] = data.get('done_if','').format(**data)251  252        return data253    254    def dep_tree(self, root):255        """Return the tree of dependencies rooted in the given nod name, 256        excluding all other nodes"""257        258        graph = {}259        for key,extract in self.extracts.items():260            graph[key] = set(extract.get('depends',[]))261            262        def _recurse(node):263            l = set([node])264            for n in graph[node]:265                l = l | _recurse(n)266            267            return l268            269        return  _recurse(root)270            271            272    def generate_extracts(self, root=None):273        """Generate dicts that have the data for an extract, along with the 274        partition, query, title and description275        276        :param root: The name of an extract group to use as the root of277        the dependency tree278        :type root: string279        280        If `root` is specified, it is a name of an extract group from the configuration,281        and the only extracts performed will be the named extracts and any of its282        dependencies. 283    284         """285        import collections286        from databundles.util import toposort287        288        ext_config = self.extracts289        # Order the extracts to satisfy dependencies. 290        graph = {}291        for key,extract in ext_config.items():292            graph[key] = set(extract.get('depends',[]))293     294        if graph:295            exec_list = []296            for group in toposort(graph):297                exec_list.extend(group)298        else:299            exec_list = ext_config.keys()300            301        if root:302            deps = self.dep_tree(root)303            exec_list = [ n for n in exec_list if n in deps]304         305       306        # now can iterate over the list. 307        for key in exec_list:308            extract = ext_config[key]309            extract['_name'] = key310            for_ = extract.get('for', "'True'")311            function = extract.get('function', False)312            each = extract.get('each', [])313            p_id = extract.get('partition', False)314            eaches = self._expand_each(each)315  316            # This part is a awful hack and should be refactored317            if function:318                for data in eaches:  319                    yield self._sub(dict(extract.items() + data.items()))320            elif p_id:       321                partitions = self._expand_partitions(p_id, for_)322    323                for partition in partitions:324                    p_dict = self._make_partition_dict(partition)325                    for data in eaches:     326                        yield self._sub(dict(p_dict.items()+extract.items() + 327                                             data.items() ))328              329    def store_document(self, package, config):330        import re, string331        id =  re.sub('[\W_]+', '-',config['title'])332        333        r = self.api.add_url_resource(package, 334                                        config['url'], 335                                        config['title'],336                                        description=config['description'])337        338        return r339          340    def extract(self, root=None, force=False):341        import os342        for extract_data in self.generate_extracts(root=root):343            file_ = self._do_extract(extract_data, force=force)344            if file_ is True:345                #self.bundle.log("Extract {} marked as done".format(extract_data['_name']))346                pass347            elif file_ and os.path.exists(file_):348                self.bundle.log("Extracted: {}".format(file_))349            else:350                self.bundle.error("Extracted file {} does not exist".format(file_))351       352        return True353                    354    def submit(self,  root=None, force=False, repo=None): 355        """Create a dataset for the bundle, then add a resource for each of the356        extracts listed in the bundle.yaml file"""357        358        if repo:359            self.repo_name = repo360            self.set_api()361        362        self.bundle.update_configuration()363        from os.path import  basename364    365        ckb = self.api.update_or_new_bundle_extract(self.bundle)366        367        sent = set()368        369        # Clear out existing resources. 370        ckb['resources'] = []      371        self.api.put_package(ckb)372        373        for doc in self.bundle.config.group('about').get('documents',[]):374            self.store_document(ckb, doc)375        for extract_data in self.generate_extracts(root=root):376            file_ = self._do_extract(extract_data, force=force)377            if file_ not in sent:378                r = self._send(ckb, extract_data,file_)379                sent.add(file_)380                url = r['ckan_url']381                self.bundle.log("Submitted {} to {}".format(basename(file_), url))382            else:383                self.bundle.log("Already processed {}, not sending.".format(basename(file_)))384        ...

ocr_badao.py

Source:ocr_badao.py

1"""2Usage:3python get_reference_embedded.py \4    --img_lst ./img_lst.txt \5    --output_dir ./output6    --verbose True7"""8import re9import os10import cv211import glob212import argparse13import pytesseract14import numpy as np15from tqdm import tqdm16import re17from constrains import check_embedded18def get_reference_embbeded(path, output_dir, verbose=False):19    img = cv2.imread(path)20    # img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)21    _, bw_img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)22    # Extract information from the image23    extract_data = pytesseract.image_to_data(bw_img, output_type=pytesseract.Output.DICT)24    # Get the object recognized is page_num, block_num, par_num, line_num, word_num25    # page_num_indices = np.where(np.array(extract_data['level'])==1)[0]26    # block_num_indices = np.where(np.array(extract_data['level'])==2)[0]27    # par_num_indices = np.where(np.array(extract_data['level'])==3)[0]28    # line_num_indices = np.where(np.array(extract_data['level'])==4)[0]29    word_num_indices = np.where(np.array(extract_data['level'])==5)[0]30    # Get the object have specific patterns31    # Case 132    filter_word_num_indices_case1 = [i for i in word_num_indices33                                    if re.match("[A-Z0-9]\-[A-Za-z]{4,10}", extract_data['text'][i])]34    # # Case 235    filter_word_num_indices_case2 = [i for i in word_num_indices36                                    if re.match("\([A-Z]\.[0-9]", extract_data['text'][i])]37    bw_img = bw_img[:, :, 0]38    filter_word_num_indices_case2 = [39        i for i in filter_word_num_indices_case240            if check_embedded(bin_img=bw_img,41                            bbox=(extract_data['left'][i], 42                                  extract_data['top'][i], 43                                  extract_data['width'][i], 44                                  extract_data['height'][i]45                            )46            )47    ]48    # # Case 349    filter_word_num_indices_case3 = [i for i in word_num_indices50                                    if re.match("[\-+]{0,1}[0-9]{1,2}[+\-\*/><=][0-9]{1,2}", extract_data['text'][i])]51    52    # # Case 453    filter_word_num_indices_case4 = [i for i in word_num_indices54                                    if re.match("exp\(|dim\(|rank\(|cos\(|sin\(|tan\(|cotan\(|Ker\(|Im\(|LSM\(", extract_data['text'][i])]55    # Case 556    filter_word_num_indices_case5 = [i for i in word_num_indices57                                    if re.match("[A-Z]+\(.", extract_data['text'][i])]58    if len(filter_word_num_indices_case1) + len(filter_word_num_indices_case2) + len(filter_word_num_indices_case3) + len(filter_word_num_indices_case4) + len(filter_word_num_indices_case5) == 0:59        return60    61    # Write detection result to file62    with open(os.path.join(output_dir, 'case1.txt'), 'a+') as f:63        for i in filter_word_num_indices_case1:64            line = "{}, {}, {}, {}, {}, {}, {}".format(65                os.path.basename(path),66                extract_data['left'][i], 67                extract_data['top'][i], 68                extract_data['left'][i]+extract_data['width'][i], 69                extract_data['top'][i]+extract_data['height'][i],70                extract_data['conf'][i]/100, 071            )72            f.write(line + '\n')73    with open(os.path.join(output_dir, 'case2.txt'), 'a+') as f:74        for i in filter_word_num_indices_case2:75            line = "{}, {}, {}, {}, {}, {}, {}".format(76                os.path.basename(path),77                extract_data['left'][i], 78                extract_data['top'][i], 79                extract_data['left'][i]+extract_data['width'][i], 80                extract_data['top'][i]+extract_data['height'][i],81                extract_data['conf'][i]/100, 082            )83            f.write(line + '\n')84    with open(os.path.join(output_dir, 'case3.txt'), 'a+') as f:85        for i in filter_word_num_indices_case3:86            line = "{}, {}, {}, {}, {}, {}, {}".format(87                os.path.basename(path),88                extract_data['left'][i], 89                extract_data['top'][i], 90                extract_data['left'][i]+extract_data['width'][i], 91                extract_data['top'][i]+extract_data['height'][i],92                extract_data['conf'][i]/100, 093            )94            f.write(line + '\n')95    with open(os.path.join(output_dir, 'case4.txt'), 'a+') as f:96        for i in filter_word_num_indices_case4:97            line = "{}, {}, {}, {}, {}, {}, {}".format(98                os.path.basename(path),99                extract_data['left'][i], 100                extract_data['top'][i], 101                extract_data['left'][i]+extract_data['width'][i], 102                extract_data['top'][i]+extract_data['height'][i],103                extract_data['conf'][i]/100, 0104            )105            f.write(line + '\n')106    with open(os.path.join(output_dir, 'case5.txt'), 'a+') as f:107        for i in filter_word_num_indices_case5:108            line = "{}, {}, {}, {}, {}, {}, {}".format(109                os.path.basename(path),110                extract_data['left'][i], 111                extract_data['top'][i], 112                extract_data['left'][i]+extract_data['width'][i], 113                extract_data['top'][i]+extract_data['height'][i],114                extract_data['conf'][i]/100, 0115            )116            f.write(line + '\n')117    if verbose:118        # Draw case 1119        for i in filter_word_num_indices_case1:120            (x, y, w, h) = (extract_data['left'][i], 121                            extract_data['top'][i], 122                            extract_data['width'][i], 123                            extract_data['height'][i])124            img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)125            cv2.putText(img, '1', (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)126        # Draw case 2127        for i in filter_word_num_indices_case2:128            (x, y, w, h) = (extract_data['left'][i], 129                            extract_data['top'][i], 130                            extract_data['width'][i], 131                            extract_data['height'][i])132            img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 127, 255), 2)133            cv2.putText(img, '2', (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 127, 255), 2)134        # Draw case 3135        for i in filter_word_num_indices_case3:136            (x, y, w, h) = (extract_data['left'][i], 137                            extract_data['top'][i], 138                            extract_data['width'][i], 139                            extract_data['height'][i])140            img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 255), 2)141            cv2.putText(img, '3', (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)142        # Draw case 4143        for i in filter_word_num_indices_case4:144            (x, y, w, h) = (extract_data['left'][i], 145                            extract_data['top'][i], 146                            extract_data['width'][i], 147                            extract_data['height'][i])148            img = cv2.rectangle(img, (x, y), (x + w, y + h), (127, 0, 127), 2)149            cv2.putText(img, '4', (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (127, 0, 127), 2)150        # Draw case 5151        for i in filter_word_num_indices_case5:152            (x, y, w, h) = (extract_data['left'][i], 153                            extract_data['top'][i], 154                            extract_data['width'][i], 155                            extract_data['height'][i])156            img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)157            cv2.putText(img, '5', (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)158        save_bin_img_file = os.path.join(output_dir, 'bin_images', os.path.basename(path))159        save_img_file = os.path.join(output_dir, 'images', os.path.basename(path))160        #save_text_file = os.path.join(output_dir, 'text', os.path.basename(path).split(".")[0]+'.txt')161        162        cv2.imwrite(save_bin_img_file, bw_img)163        cv2.imwrite(save_img_file, img)164        # with open(save_text_file, 'w') as f:165        #     for i in filter_word_num_indices_case1:166        #         f.write(extract_data['text'][i]+'\n')167        #     for i in filter_word_num_indices_case2:168        #         f.write(extract_data['text'][i]+'\n')169        #     for i in filter_word_num_indices_case3:170        #         f.write(extract_data['text'][i]+'\n')171        #     for i in filter_word_num_indices_case4:172        #         f.write(extract_data['text'][i]+'\n')173        #     for i in filter_word_num_indices_case5:174        #         f.write(extract_data['text'][i]+'\n')175        #     pass176def main(args):177    178    # Check invalid input or not179    if not os.path.exists(args['img_lst']):180        print("INVALID IMAGE LIST FILE !")181        exit(0)182    if not os.path.exists(args['output_dir']):183        print("Creating output directory: {}".format(args['output_dir']))184        try:185            os.mkdir(args['output_dir'])186        except:187            print("INVALID OUTPUT DIRECTORY !")188            exit(0)189        os.mkdir(os.path.join(args['output_dir'], 'images'))190        os.mkdir(os.path.join(args['output_dir'], 'bin_images'))191        os.mkdir(os.path.join(args['output_dir'], 'text'))192    if not os.path.exists(os.path.join(args['output_dir'], 'bin_images')):193        os.mkdir(os.path.join(args['output_dir'], 'bin_images'))194    if not os.path.exists(os.path.join(args['output_dir'], 'images')):195        os.mkdir(os.path.join(args['output_dir'], 'images'))196    if not os.path.exists(os.path.join(args['output_dir'], 'text')):197        os.mkdir(os.path.join(args['output_dir'], 'text'))198    # Get the existing image path from file199    with open(args['img_lst'], 'r') as f:200        img_paths = [line.rstrip() for line in f.readlines() if os.path.exists(line.rstrip())]201    # Extract information from each image202    for i, img_path in enumerate(img_paths):203        print("{:6}/{:6} Extracting {}".format(204            str(i).zfill(6), str(len(img_paths)).zfill(6), os.path.basename(img_path)))205        get_reference_embbeded(path=img_path, output_dir=args['output_dir'], verbose=args['verbose'])206if __name__ == '__main__':207    parser = argparse.ArgumentParser(description='Extract information from image using Tesseract OCR engine.')208    parser.add_argument('--img_lst', required=True,209        help='an integer for the accumulator')210    parser.add_argument('--output_dir', required=True,211        help='The output directory to store extracted information')212    parser.add_argument('--verbose', action='store_true',213        help='Store image to disk wheter or not.')214    args = vars(parser.parse_args())215    print(args)...

__init__.py

Source:__init__.py

1from .polygon import get_subset_area2from ..utils.plink import run_plink3import pandas as pd4import tempfile5import numpy as np6from numpy.random import randn7def filter_data(meta_data, bedfile, missing=0.001, plink="plink",8                exclude_loci=[],9                has_dataset=False, outfile='TMP_PLINK', per_ind_missing=1.,10                max_per_pop=5000):11    """filter_data12    filters bed file to only keep the individuals in meta_data, uses plink13    14    the data is read from bedfile, and written to the file in the TMP_PLINK file15    global variable16    17    Parameters18    ----------19    meta_data : pd.DataFrame20        pandas data frame with individuals to keep21    bedfile : path22        the bedfile to be filtered23    plink : path24        the plink executable to be used25    26    """27    include_name = '%s.incl' % outfile28    try:29        fam = pd.read_table("%s.fam" % bedfile, header=None,30                            skipinitialspace=True)31        fam.columns = ['FAM', 'sampleId', 'a', 'b', 'c', 'd']32    except ValueError:33        fam = pd.read_table("%s.fam" % bedfile, header=None,34                            skipinitialspace=True, sep=" ")35        fam.columns = ['FAM', 'sampleId', 'a', 'b', 'c', 'd']36    extract_data = meta_data.merge(fam, on='sampleId', how='inner')37    #538    extract_data['random'] = randn(extract_data.shape[0])39    extract_data.index = extract_data.sampleId40    print("subsetting: ", extract_data.shape)41    largest = extract_data.groupby('popId')['random'].nlargest(max_per_pop)42    print(largest)43    #largest = extract_data.groupby('popId')['random'].nlargest(5)44    largest= pd.DataFrame(largest.index)45    print(largest)46    print("subsetting: ", largest.shape, largest.columns)47    extract_data = extract_data.merge(largest, on="sampleId", how="inner")48    print("subsetting: ", extract_data.shape, largest.shape)49    extract_data.to_csv(include_name, sep=' ',50                        columns=('FAM', 'sampleId'),51                        header=None, index=None)52    extract_data.drop('a', axis=1, inplace=True)53    extract_data.drop('b', axis=1, inplace=True)54    extract_data.drop('c', axis=1, inplace=True)55    extract_data.drop('d', axis=1, inplace=True)56    extract_data.drop('FAM', axis=1, inplace=True)57    extract_data.drop('POINTS', axis=1, inplace=True)58    print(extract_data.columns)59    #if has_dataset:60    #    meta_data = extract_data[['sampleId', 'POP', 'latitude', 'longitude', 'FAM',61    #                              'DATASET']]62    #else:63    #    meta_data = extract_data[['sampleId', 'POP', 'latitude', 'longitude', 'FAM']]64    extract_data = extract_data[pd.notnull(extract_data['latitude'])]65    extract_data = extract_data[pd.notnull(extract_data['longitude'])]66    flags = dict()67    flags['make-bed'] = ''68    #flags['allow-extra-chr'] = ''69    flags['bfile'] = bedfile70    flags['out'] = outfile71    flags['keep'] = include_name72    flags['indiv-sort'] = 'f %s' % include_name73    if exclude_loci != []:74        exclude_loci_file = tempfile.NamedTemporaryFile(delete=False, mode="w")75        for locus in exclude_loci:76            exclude_loci_file.write("%s\n"% locus)77        exclude_loci_file.close()78        flags['exclude'] = exclude_loci_file.name79        print("excluding loci %s " % exclude_loci_file.name)80    run_plink(plink, flags)81    flags['bfile'] = outfile82#    flags['geno'] = '%s' % (float(missing)  * 2)83    flags['geno'] = '%s' % missing84    run_plink(plink, flags)85    flags['mind'] = '%s' % per_ind_missing86    del flags['geno']87    #flags['geno'] = '%s' % missing88    run_plink(plink, flags)89    #now re-read fam file to find retained guys90    print(extract_data.shape)91    fam = pd.read_table("%s.fam" % outfile, header=None,92                        skipinitialspace=True, sep=" ")93    fam.columns = ['FAM', 'sampleId', 'a', 'b', 'c', 'd']94    print(fam.shape)95    extract_data = extract_data.merge(fam, on='sampleId', how='inner')96    extract_data.drop('a', axis=1, inplace=True)97    extract_data.drop('b', axis=1, inplace=True)98    extract_data.drop('c', axis=1, inplace=True)99    extract_data.drop('d', axis=1, inplace=True)100    extract_data.drop('FAM', axis=1, inplace=True)101    print(extract_data.shape)...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.