How to use passing method in Robotframework

Best Python code snippet using robotframework

genotype_filters.py

Source:genotype_filters.py Github

copy

Full Screen

1"""2Filters on the result of boolean operations on multiple samples genotypes,3such as "all have the same genotype" or "all are homozygous".4"""5from django.conf import settings6from varapp.filters.apply_bitwise import c_apply_bitwise # from cython extension7from varapp.constants.filters import FILTER_CLASS_GENOTYPE8from varapp.constants.genotype import *9from varapp.data_models.samples import SamplesSelection10from varapp.data_models.variants import *11from varapp.filters.filters import Filter, FilterResult, FiltersCollection12from varapp.variants.genotypes_service import genotypes_service13from varapp.variants.variants_factory import set_source14import abc, itertools, multiprocessing as mp15import numpy as np16from functools import reduce17from operator import attrgetter, itemgetter, __and__18from time import time19AND = 'AND'20OR = 'OR'21DEBUG = True and settings.DEBUG22def merge_conditions_array(conds):23 """If there are multiple affected samples sharing the same parents,24 the conditions can be redundant. Simplify the conditions array so that25 there is at most one for each genotype/sample. If there are several constraints26 for the same genotype, check that they are compatible and take the strongest27 (lowest bit value).28 :param conds: an array of couples [sample_index, genotype_bit]29 :rtype: same as input30 """31 merged = []32 if not conds:33 return merged34 # Group by sample index, and get a single common bit for all conds on that sample35 conds.sort(key=itemgetter(0))36 for idx,group in itertools.groupby(conds, itemgetter(0)):37 genbits = [x[1] for x in group] # only the genotype bit38 common_bits = reduce(__and__, genbits)39 merged.append((idx, common_bits))40 return merged41class GenotypesFilter(Filter):42 """Defines a way to *apply* a filter on variants genotypes."""43 __metaclass__ = abc.ABCMeta44 filter_class = FILTER_CLASS_GENOTYPE45 need_groups = [] # The required group names in the samples selection for the filter to work.46 need_parents = 0 # Whether 0/1/2 parents are required for the filter to work47 def __init__(self, ss:SamplesSelection, val, name='genotype', op='=', db=None):48 super().__init__(name=name, op=op, val=val, ss=ss, db=db)49 self.nsamples = len(ss.active_idx)50 self.merge_op = AND51 self.shortcut = False # Flag: if True, don't filter anything52 # Need at least one active sample53 if len(self.ss.active_idx) == 0:54 self.shortcut = True55 # If parents are required, check that both are present for at least one of the affected samples56 mothers_aff = [ss.mother_idx_of(s) for s in ss.affected]57 fathers_aff = [ss.father_idx_of(s) for s in ss.affected]58 if self.need_parents == 2 and all(None in x for x in zip(mothers_aff, fathers_aff)):59 self.shortcut = True60 elif self.need_parents == 1 and all((x,y)==(None,None) for x,y in zip(mothers_aff, fathers_aff)):61 self.shortcut = True62 # If certain groups are required, check that they are present in the selection63 if any((x not in ss.groups.keys() or len(ss.groups[x]) == 0) for x in self.need_groups):64 self.shortcut = True65 # The compound case implements its own stuff, but otherwise do that:66 if self.val != GENOTYPE_COMPOUND:67 conditions_array = self.build_conditions_array()68 self.conditions_array = merge_conditions_array(conditions_array)69 if len(self.conditions_array) == 0:70 self.shortcut = True71 self.conditions_vector = self.build_conditions_vector(self.conditions_array)72 def build_conditions_array(self):73 """Construct a list of lists [sample_idx, BITCODE], one for each sample.74 Then a variant passes if in its decoded gts, there is BITCODE at position idx.75 Once only: it is proper to the filter (with the list of all possible samples,76 but no samples selection)."""77 raise NotImplementedError("No `build_conditions_array` method implemented.")78 def build_conditions_vector(self, conditions_array):79 """From a *conditions_array*, of elements [sample_idx, BITCODE],80 build a vector of size len(active_samples) with BITCODE at indices81 where a condition is given, and GENOTYPE_BIT_ANY elsewhere.82 :rtype: np.ndarray[uint8]83 """84 active_idx = self.ss.active_idx85 conds = GENOTYPE_BIT_ANY * np.ones(len(active_idx), dtype=np.uint8)86 shift = {idx:i for i,idx in enumerate(active_idx)}87 for idx,bit in conditions_array:88 conds[shift[idx]] = bit89 return conds90 def scan_genotypes(self, genotypes, sub_ids=None, db=None):91 """Pass through all genotypes and return only the indices of those that pass the filter.92 :param genotypes: np.ndarray[uint64, dim=2]93 :rtype: np.ndarray[uint64]"""94 if self.shortcut:95 return np.zeros(0)96 N = len(genotypes)97 if sub_ids is not None:98 variant_ids = sub_ids99 elif self.val == 'x_linked' and db:100 variant_ids = genotypes_service(db).chrX101 else:102 variant_ids = np.asarray(range(1,N+1), dtype=np.uint64)103 active_idx = np.asarray(self.ss.active_idx, dtype=np.uint16)104 conditions = self.conditions_vector105 is_and = self.merge_op == AND106 if len(conditions) == 0:107 passing = variant_ids108 else:109 passing = self.parallel_apply_bitwise(genotypes, variant_ids, conditions, active_idx, is_and)110 return passing111 @staticmethod112 def parallel_apply_bitwise(genotypes, variant_ids, conditions, active_idx, is_and):113 """Run c_apply_bitwise in parallel. Takes the same arguments."""114 N = len(genotypes)115 nprocs = mp.cpu_count()116 pool = mp.Pool(processes=nprocs)117 B = round(N/nprocs + 0.5) # batch size118 # Split variant_ids in batches (genotype batches are equally-sized, but not119 # variant ids, in case a subset was given)120 split_at = variant_ids.searchsorted([(k+1)*B+1 for k in range(nprocs-1)])121 variant_ids_batches = np.split(variant_ids, split_at)122 assert len(variant_ids_batches) == nprocs123 # Run one job for each batch124 passing = [pool.apply(c_apply_bitwise,125 args=(genotypes[k*B:(k+1)*B,:],126 variant_ids_batches[k],127 conditions, active_idx, is_and, B))128 for k in range(nprocs)]129 passing = np.concatenate(passing)130 pool.close()131 return passing132 #@timer133 def apply(self, variants=None, genotypes=None, db=None, limit=None, offset=0):134 """Apply this collection of filters on a collection of variants.135 :param variants: a VariantsCollection or a QuerySet of variants.136 If None, makes a QuerySet of the whole *db*.137 :param db: database name. If no set, it tries to be inferred from *variants*.138 :param genotypes: a list of genotypes arrays.139 if None, a GenotypesService is created from the variants' db.140 In principle, set it for testing purposes only.141 :rtype: FilterResult142 """143 sub_ids = None144 if variants is None and db is not None:145 variants = Variant.objects.using(db)146 elif db is None:147 db = variants.db148 if self.shortcut:149 return FilterResult(variants=VariantsCollection([]), ids=[], n_filtered=0)150 if genotypes is None:151 assert db is not None, "Either a db name or a genotypes array is required"152 genotypes = genotypes_service(db).genotypes153 else:154 assert len(genotypes) == len(variants)155 if self.val == 'x_linked':156 if isinstance(variants, VariantsCollection):157 sub_ids = np.asarray([v.variant_id for v in variants if v.chrom=='chrX'], dtype=np.uint64)158 else:159 sub_ids = genotypes_service(db).chrX160 passing = self.scan_genotypes(genotypes, sub_ids=sub_ids, db=db)161 return FilterResult(162 variants=self.variants_from_mask(variants, passing, db, limit, offset),163 ids=passing,164 n_filtered=len(passing),165 )166 @staticmethod167 def variants_from_mask(variants, passing, db=None, limit=None, offset=0):168 """Get the collection of variants which id is in *passing*."""169 if limit is not None:170 passing = passing[offset:offset+limit]171 passing = set(passing)172 return VariantsCollection([v for v in variants if v.variant_id in passing], db=db)173 def __str__(self):174 return "<Filter {}>".format(self.short_str()) + ('-'+str(self.ss) if self.ss else '')175 def __repr__(self):176 return "<Filter {}>".format(self.short_str()) + ('-'+str(self.ss) if self.ss else '')177class GenotypesFilterDoNothing(GenotypesFilter):178 """A filter that every variant passes anyway."""179 def __init__(self, ss:SamplesSelection, db=None):180 super().__init__(ss, 'nothing', db=db)181 def build_conditions_array(self):182 assert self183 return [[i, GENOTYPE_BIT_ANY] for i in self.ss.active_idx]184class GenotypesFilterActive(GenotypesFilter):185 """Return a variant only if it is mutant in at least one of the active samples.186 """187 def __init__(self, ss:SamplesSelection, db=None):188 super().__init__(ss, GENOTYPE_ACTIVE, db=db)189 self.merge_op = OR190 def build_conditions_array(self):191 return [[i, GENOTYPE_BIT_CARRIER] for i in self.ss.active_idx]192class GenotypesFilterDominant(GenotypesFilter):193 """Simplest scenario: autosomal dominant.194 Suppose the effect is dominant, i.e. one allele195 mutated is enough to observe a phenotype.196 Filter variants that are mutated in all samples but the controls.197 """198 need_groups = ["affected"]199 def __init__(self, ss:SamplesSelection, db=None):200 super().__init__(ss, GENOTYPE_DOMINANT, db=db)201 def build_conditions_array(self):202 return [[i, GENOTYPE_BIT_CARRIER] for i in self.ss.affected_idx] + \203 [[i, GENOTYPE_BIT_NON_CARRIER] for i in self.ss.not_affected_idx]204class GenotypesFilterRecessive(GenotypesFilter):205 """Suppose the effect is recessive, i.e. a child must inherit a mutated206 allele from both carrier parents to have an observable phenotype.207 Filter mutations that are present in both the parents and homozygous208 in the "affected" children.209 Controls ("not_affected") are samples known to be non-carriers.210 """211 need_groups = ["affected"]212 def __init__(self, ss:SamplesSelection, db=None):213 super().__init__(ss, GENOTYPE_RECESSIVE, db=db)214 def build_conditions_array(self):215 conds = [] # 1 per sample, because of its particular parents216 for s in self.ss.affected:217 idx = self.ss.idx_of(s.name, active=True)218 conds.append([idx, GENOTYPE_BIT_CARRIER_HOM])219 for i in self.ss.parents_idx_of(s):220 conds.append([i, GENOTYPE_BIT_CARRIER])221 for i in self.ss.not_affected_idx:222 conds.append([i, GENOTYPE_BIT_NOT_CARRIER_HOM])223 return conds224class GenotypesFilterDeNovo(GenotypesFilter):225 """Case where a mutation is present in a child but not in the parents.226 So the controls should be the parents, but can include other non-carriers.227 Otherwise it is the same as the Dominant case.228 """229 need_groups = ["affected"]230 need_parents = 2231 def __init__(self, ss:SamplesSelection, db=None):232 super().__init__(ss, GENOTYPE_DENOVO, db=db)233 def build_conditions_array(self):234 conds = [] # 1 per sample, because of its particular parents235 for s in self.ss.affected:236 idx = self.ss.idx_of(s.name, active=True)237 parents_idx = self.ss.parents_idx_of(s)238 if len(parents_idx) == 2: # pointless if not both parents present239 if len(set(parents_idx) & set(self.ss.affected_idx)) > 0:240 continue # pointless if one of the parents is affected241 conds.append([idx, GENOTYPE_BIT_CARRIER_HET])242 for i in parents_idx:243 conds.append([i, GENOTYPE_BIT_NON_CARRIER])244 if conds:245 for i in self.ss.not_affected_idx:246 conds.append([i, GENOTYPE_BIT_NON_CARRIER])247 return conds248class GenotypesFilterXLinked(GenotypesFilter):249 """A deleterious mutation os present on chromosome X. Possible cases:250 a) Dominant case: Apart from the proportion of affected children251 of each sex, it behaves exactly like a usual dominant mutation,252 so we don't cover that case here:253 - Affected <=> carrier;254 - In principle one of the parents should carry it, but it could be de novo.255 b) Recessive case:256 - Affected <=> woman carrier hom, or man carrier het;257 - For a woman, both parents must be carriers (and the father is affected);258 - For a man, only the mother must be carrier.259 """260 need_groups = ["affected"]261 need_parents = 0262 def __init__(self, ss:SamplesSelection, db=None):263 super().__init__(ss, GENOTYPE_XLINKED, db=db)264 def build_conditions_array(self):265 conds = [] # 1 per sample, because of its particular parents266 for s in self.ss.affected:267 idx = self.ss.idx_of(s.name, active=True)268 # Male: carrier het, and the mother is carrier269 if s.sex == 'M':270 conds.append([idx, GENOTYPE_BIT_CARRIER_HET])271 i = self.ss.mother_idx_of(s)272 if i is not None:273 conds.append([i, GENOTYPE_BIT_CARRIER])274 # Female: carrier hom, and both parents are carriers275 elif s.sex == 'F':276 conds.append([idx, GENOTYPE_BIT_CARRIER_HOM])277 for i in self.ss.parents_idx_of(s):278 conds.append([i, GENOTYPE_BIT_CARRIER])279 for s in self.ss.not_affected:280 idx = self.ss.idx_of(s.name, active=True)281 # Male unaffected cannot be carriers282 if s.sex == 'M':283 conds.append([idx, GENOTYPE_BIT_NON_CARRIER])284 # Female unaffected could be carrier het285 elif s.sex == 'F':286 conds.append([idx, GENOTYPE_BIT_NOT_CARRIER_HOM])287 return conds288class GenotypesFilterCompoundHeterozygous(GenotypesFilter):289 """Case where two mutations, inherited one from each parent,290 occur in the same gene and thus code for two defective proteins.291 Compose two results:292 - father is carrier in that gene and child has it;293 - mother is carrier in that same gene and child has it.294 Notes:295 - We cannot group conditions for many samples as we did before, because296 they can be touched by different compounds pairs in the same gene (rare ?).297 - Neither of the parents can be homozygous, or he would be affected (both proteins are touched).298 - A child cannot be homozygous at any position of the compounds pair, because299 that would suffice to invalidate both proteins and is indistinguishable from the300 recessive case.301 - Both parents could be affected at one position of the compounds pair (rare ?).302 """303 need_groups = ["affected"]304 need_parents = 2305 def __init__(self, ss:SamplesSelection, db=None):306 super().__init__(ss, val=GENOTYPE_COMPOUND, db=db)307 self.conditions_array = self.build_conditions_array()308 if not self.conditions_array:309 self.shortcut = True310 else:311 self.conditions_vector = self.build_compound_conditions_vector()312 def build_conditions_array(self):313 """Returns pairs of condition (paternal, maternal), one for each sample,314 in a dict {sample_name: [cond1, cond2]}.315 Make it also for non affected, because we want to find false positives searching316 as if they were affected. An unaffected sample could well carry one of the two variants.317 """318 conds = {}319 # Common condition: all affected are carriers het, and no unaffected can be homozygous320 base_cond = [(i, GENOTYPE_BIT_NOT_CARRIER_HOM) for i in self.ss.not_affected_idx] \321 + [(i, GENOTYPE_BIT_CARRIER_HET) for i in self.ss.affected_idx]322 for s in self.ss.active:323 idx = self.ss.idx_of(s.name, active=True)324 father_idx = self.ss.father_idx_of(s)325 mother_idx = self.ss.mother_idx_of(s)326 if father_idx is None or mother_idx is None:327 continue328 if father_idx in self.ss.affected_idx or mother_idx in self.ss.affected_idx:329 continue # pointless if one of the parents is affected330 # Father carrier331 c1 = base_cond + [332 (idx, GENOTYPE_BIT_CARRIER_HET), # in case it is not affected, but we simulate for false positives333 (father_idx, GENOTYPE_BIT_CARRIER),334 (mother_idx, GENOTYPE_BIT_NON_CARRIER),335 ]336 # Mother carrier337 c2 = base_cond + [338 (idx, GENOTYPE_BIT_CARRIER_HET),339 (father_idx, GENOTYPE_BIT_NON_CARRIER),340 (mother_idx, GENOTYPE_BIT_CARRIER),341 ]342 # Note: c1 and c2 cannot both be true at the same genomic position343 c1 = tuple(merge_conditions_array(c1))344 c2 = tuple(merge_conditions_array(c2))345 conds[s.name] = (c1, c2)346 # Remove duplicate conditions to speed it up347 seen = set()348 dups = set()349 for k,v in conds.items():350 if v in seen:351 dups.add(k)352 else:353 seen.add(v)354 for name in dups:355 conds.pop(name)356 return conds357 def build_compound_conditions_vector(self):358 """Extend *self.build_conditions_vector()* to apply it to all sub-elements359 *c1*,*c2* of the more complicated {sample: [c1, c2]} of the compound case."""360 conditions = {}361 for sample, conds in self.conditions_array.items():362 conditions[sample] = [None,None]363 conditions[sample][0] = self.build_conditions_vector(conds[0])364 conditions[sample][1] = self.build_conditions_vector(conds[1])365 return conditions366 def apply(self, variants=None, genotypes=None, db=None, limit=None, offset=0, sub_ids=None, parallel=True):367 """:param sub_ids: does nothing, just inheritance"""368 if self.shortcut:369 return FilterResult(variants=VariantsCollection([]), ids=[], n_filtered=0)370 if variants is None and db is not None:371 variants = Variant.objects.using(db)372 elif db is None:373 db = variants.db374 if db is None:375 batches = {gene: np.array([v.variant_id for v in var], dtype=np.uint64)376 for gene,var in itertools.groupby(variants, key=attrgetter('gene_symbol'))}377 else:378 gs = genotypes_service(db)379 batches = gs.variant_ids_batches_by_gene380 if genotypes is None:381 assert db is not None, "Either a db name or a genotypes array is required"382 genotypes = genotypes_service(db).genotypes383 else:384 assert len(genotypes) == len(variants)385 passing, sources, pairs = self.scan_genotypes_compound(genotypes, batches, parallel)386 variants = self.variants_from_mask(variants, passing, db, limit, offset)387 for v in variants:388 set_source(v, sources[v.variant_id])389 return FilterResult(390 variants=variants,391 ids=passing,392 n_filtered=len(passing),393 )394 def scan_genotypes_compound(self, genotypes, batches, parallel=True):395 """Scan the *genotypes* array for compounds. Variant ids are treated in batches,396 - one list of variant_ids per gene."""397 if self.shortcut:398 passing, sources, pairs = np.zeros(0), {}, []399 else:400 N = len(genotypes)401 active_idx = np.asarray(self.ss.active_idx, dtype=np.uint16)402 batches = list(batches.items())403 if parallel:404 passing, sources, pairs = self.parallel_batches(genotypes, batches, active_idx, N)405 else:406 passing, sources, pairs = self.process_batches(genotypes, batches, active_idx, N)407 passing = np.array(list(passing), dtype=np.uint64)408 passing.sort()409 return passing, sources, pairs410 def parallel_batches(self, genotypes, batches, active_idx, N):411 """Parallelize the scanning of genotypes for compounds over groups of genes."""412 passing = set()413 sources = {}414 pairs = []415 nprocs = mp.cpu_count()416 NB = len(batches)417 B = round(NB/nprocs + 0.5) # batch size418 split_batches = [batches[k*B:(k+1)*B] for k in range(nprocs)]419 if DEBUG and 0:420 print(" @parallel_batches {} CPUs: {}".format(nprocs, [len(x) for x in split_batches]))421 pool = mp.Pool(processes=nprocs)422 res = [pool.apply_async(self.process_batches,423 args=(np.copy(genotypes), list(split_batches[k]), np.copy(active_idx), N))424 for k in range(nprocs)]425 output = [x.get() for x in res]426 for x in output:427 passing |= x[0]428 sources.update(x[1])429 pairs += x[2]430 pool.close()431 return passing, sources, pairs432 def process_batches(self, genotypes, batches, active_idx, N):433 """Search a batch of genes for compounds."""434 passing = set()435 sources = {}436 pairs = []437 tbatch = 0438 for gene,variant_ids in batches:439 t1 = time()440 local_passing, local_sources, local_pairs = self.process_1_batch(variant_ids, genotypes, active_idx, N)441 t2 = time()442 tbatch += t2-t1443 passing |= local_passing444 pairs += local_pairs445 sources.update(local_sources)446 if DEBUG and 0:447 print(" Processed batches in {:.3f}s ({} passing)".format(tbatch,len(passing)))448 return passing, sources, pairs449 def process_1_batch(self, variant_ids, genotypes, active_idx, N):450 """Search 1 gene for compounds. Return:451 local_passing: set of variant_ids passing the filter452 local_sources: dict `{variant_id: 'paternal'/'maternal'}`453 local_pairs: list of compound pairs `(variant_id1, variant_id2)`454 """455 # Check that all affected samples have the compound456 local_passing_mother = set()457 local_passing_father = set()458 local_sources = {}459 for affected in self.ss.affected:460 if affected.name not in self.conditions_vector:461 continue462 conds = self.conditions_vector[affected.name]463 passing_father = set(c_apply_bitwise(genotypes, variant_ids, conds[0], active_idx, True, N))464 passing_mother = set(c_apply_bitwise(genotypes, variant_ids, conds[1], active_idx, True, N))465 # Exclude compounds that healthy samples carry as well466 if len(passing_father) > 0 and len(passing_mother) > 0:467 fp1 = set()468 fp2 = set()469 local_ids = np.array(list(passing_father | passing_mother), dtype=np.uint64)470 for healthy in self.ss.not_affected:471 if healthy.name not in self.conditions_vector:472 continue473 conds = np.asarray(self.conditions_vector[healthy.name], dtype=np.uint8)474 false_father = c_apply_bitwise(genotypes, local_ids, conds[0], active_idx, True, N)475 false_mother = c_apply_bitwise(genotypes, local_ids, conds[1], active_idx, True, N)476 false_pairs = list(itertools.product(false_father, false_mother))477 for p1, p2 in false_pairs:478 if p1 in passing_father and p2 in passing_mother:479 fp1.add(p1)480 fp2.add(p2)481 passing_father = passing_father - fp1482 passing_mother = passing_mother - fp2483 # If there are any left in both lists, add them to the result set484 if len(passing_father) > 0 and len(passing_mother) > 0:485 for k in passing_father:486 local_sources[k] = 'paternal'487 for k in passing_mother:488 local_sources[k] = 'maternal'489 if len(local_passing_father) == 0:490 local_passing_father = passing_father491 else:492 local_passing_father &= passing_father493 if len(local_passing_mother) == 0:494 local_passing_mother = passing_mother495 else:496 local_passing_mother &= passing_mother497 # All affected samples must have at least one of the combinations498 else:499 local_passing_father = set()500 local_passing_mother = set()501 local_sources = {}502 break # go to next gene503 local_passing = local_passing_father | local_passing_mother504 local_pairs = list(itertools.product(505 map(int,local_passing_father), # map to int because of new numpy warning when used as index506 map(int,local_passing_mother)507 ))...

Full Screen

Full Screen

School_Analysis.py

Source:School_Analysis.py Github

copy

Full Screen

1#!/usr/bin/env python2# coding: utf-83# # PyCity Schools Analysis4# 5# * As a whole, schools with higher budgets, did not yield better test results. By contrast, schools with higher spending per student actually (\$645-675) underperformed compared to schools with smaller budgets (<\$585 per student).6# 7# * As a whole, smaller and medium sized schools dramatically out-performed large sized schools on passing math performances (89-91% passing vs 67%).8# 9# * As a whole, charter schools out-performed the public district schools across all metrics. However, more analysis will be required to glean if the effect is due to school practices or the fact that charter schools tend to serve smaller student populations per school. 10# ---11# In[1]:12# Dependencies and Setup13import pandas as pd14# File to Load (Remember to Change These)15school_data_to_load = "Resources/schools_complete.csv"16student_data_to_load = "Resources/students_complete.csv"17# Read School and Student Data File and store into Pandas Data Frames18school_data = pd.read_csv(school_data_to_load)19student_data = pd.read_csv(student_data_to_load)20# Combine the data into a single dataset21school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])22# ## District Summary23# In[2]:24# Calculate the Totals (Schools and Students)25school_count = len(school_data_complete["school_name"].unique())26student_count = school_data_complete["Student ID"].count()27# Calculate the Total Budget28total_budget = school_data["budget"].sum()29# Calculate the Average Scores30average_math_score = school_data_complete["math_score"].mean()31average_reading_score = school_data_complete["reading_score"].mean()32overall_passing_rate = (average_math_score + average_reading_score) / 233# Calculate the Percentage Pass Rates34passing_math_count = school_data_complete[(school_data_complete["math_score"] >= 70)].count()["student_name"]35passing_math_percentage = passing_math_count / float(student_count) * 10036passing_reading_count = school_data_complete[(school_data_complete["reading_score"] >= 70)].count()["student_name"]37passing_reading_percentage = passing_reading_count / float(student_count) * 10038# Minor Data Cleanup39district_summary = pd.DataFrame({"Total Schools": [school_count], 40 "Total Students": [student_count], 41 "Total Budget": [total_budget],42 "Average Math Score": [average_math_score], 43 "Average Reading Score": [average_reading_score],44 "% Passing Math": [passing_math_percentage],45 "% Passing Reading": [passing_reading_percentage],46 "% Overall Passing Rate": [overall_passing_rate]})47district_summary = district_summary[["Total Schools", "Total Students", "Total Budget",48 "Average Math Score", 49 "Average Reading Score",50 "% Passing Math",51 "% Passing Reading",52 "% Overall Passing Rate"]]53district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)54district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)55# Display the data frame56district_summary57# ## School Summary58# In[3]:59# Determine the School Type60school_types = school_data.set_index(["school_name"])["type"]61# Calculate the total student count62per_school_counts = school_data_complete["school_name"].value_counts()63# Calculate the total school budget and per capita spending64# per_school_budget = school_data_complete.groupby(["school_name"]).mean()["budget"]65per_school_budget = school_data_complete.groupby(["school_name"]).mean()["budget"]66per_school_capita = per_school_budget / per_school_counts67# Calculate the average test scores68per_school_math = school_data_complete.groupby(["school_name"]).mean()["math_score"]69per_school_reading = school_data_complete.groupby(["school_name"]).mean()["reading_score"]70# Calculate the passing scores by creating a filtered data frame71school_passing_math = school_data_complete[(school_data_complete["math_score"] >= 70)]72school_passing_reading = school_data_complete[(school_data_complete["reading_score"] >= 70)]73per_school_passing_math = school_passing_math.groupby(["school_name"]).count()["student_name"] / per_school_counts * 10074per_school_passing_reading = school_passing_reading.groupby(["school_name"]).count()["student_name"] / per_school_counts * 10075overall_passing_rate = (per_school_passing_math + per_school_passing_reading) / 276# Convert to data frame77per_school_summary = pd.DataFrame({"School Type": school_types,78 "Total Students": per_school_counts,79 "Total School Budget": per_school_budget,80 "Per Student Budget": per_school_capita,81 "Average Math Score": per_school_math,82 "Average Reading Score": per_school_reading,83 "% Passing Math": per_school_passing_math,84 "% Passing Reading": per_school_passing_reading,85 "% Overall Passing Rate": overall_passing_rate})86# Minor data munging87per_school_summary = per_school_summary[["School Type", "Total Students", "Total School Budget", "Per Student Budget",88 "Average Math Score", "Average Reading Score", 89 "% Passing Math", "% Passing Reading", 90 "% Overall Passing Rate"]]91per_school_summary["Total School Budget"] = per_school_summary["Total School Budget"].map("${:,.2f}".format)92per_school_summary["Per Student Budget"] = per_school_summary["Per Student Budget"].map("${:,.2f}".format)93# Display the data frame94per_school_summary95# ## Top Performing Schools (By Passing Rate)96# In[4]:97# Sort and show top five schools98top_schools = per_school_summary.sort_values(["% Overall Passing Rate"], ascending=False)99top_schools.head(5)100# ## Bottom Performing Schools (By Passing Rate)101# In[5]:102# Sort and show bottom five schools103bottom_schools = per_school_summary.sort_values(["% Overall Passing Rate"], ascending=True)104bottom_schools.head(5)105# ## Math Scores by Grade106# In[6]:107# Create data series of scores by grade levels using conditionals108ninth_graders = school_data_complete[(school_data_complete["grade"] == "9th")]109tenth_graders = school_data_complete[(school_data_complete["grade"] == "10th")]110eleventh_graders = school_data_complete[(school_data_complete["grade"] == "11th")]111twelfth_graders = school_data_complete[(school_data_complete["grade"] == "12th")]112# Group each by school name113ninth_graders_scores = ninth_graders.groupby(["school_name"]).mean()["math_score"]114tenth_graders_scores = tenth_graders.groupby(["school_name"]).mean()["math_score"]115eleventh_graders_scores = eleventh_graders.groupby(["school_name"]).mean()["math_score"]116twelfth_graders_scores = twelfth_graders.groupby(["school_name"]).mean()["math_score"]117# Combine series into single data frame118scores_by_grade = pd.DataFrame({"9th": ninth_graders_scores, "10th": tenth_graders_scores,119 "11th": eleventh_graders_scores, "12th": twelfth_graders_scores})120# Minor data munging121scores_by_grade = scores_by_grade[["9th", "10th", "11th", "12th"]]122scores_by_grade.index.name = None123# Display the data frame124scores_by_grade125# ## Reading Score by Grade 126# In[7]:127# Create data series of scores by grade levels using conditionals128ninth_graders = school_data_complete[(school_data_complete["grade"] == "9th")]129tenth_graders = school_data_complete[(school_data_complete["grade"] == "10th")]130eleventh_graders = school_data_complete[(school_data_complete["grade"] == "11th")]131twelfth_graders = school_data_complete[(school_data_complete["grade"] == "12th")]132# Group each by school name133ninth_graders_scores = ninth_graders.groupby(["school_name"]).mean()["reading_score"]134tenth_graders_scores = tenth_graders.groupby(["school_name"]).mean()["reading_score"]135eleventh_graders_scores = eleventh_graders.groupby(["school_name"]).mean()["reading_score"]136twelfth_graders_scores = twelfth_graders.groupby(["school_name"]).mean()["reading_score"]137# Combine series into single data frame138scores_by_grade = pd.DataFrame({"9th": ninth_graders_scores, "10th": tenth_graders_scores,139 "11th": eleventh_graders_scores, "12th": twelfth_graders_scores})140# Minor data munging141scores_by_grade = scores_by_grade[["9th", "10th", "11th", "12th"]]142scores_by_grade.index.name = None143# Display the data frame144scores_by_grade145# ## Scores by School Spending146# In[8]:147# Establish the bins 148spending_bins = [0, 585, 615, 645, 675]149group_names = ["<$585", "$585-615", "$615-645", "$645-675"]150# Categorize the spending based on the bins151per_school_summary["Spending Ranges (Per Student)"] = pd.cut(per_school_capita, spending_bins, labels=group_names)152spending_math_scores = per_school_summary.groupby(["Spending Ranges (Per Student)"]).mean()["Average Math Score"]153spending_reading_scores = per_school_summary.groupby(["Spending Ranges (Per Student)"]).mean()["Average Reading Score"]154spending_passing_math = per_school_summary.groupby(["Spending Ranges (Per Student)"]).mean()["% Passing Math"]155spending_passing_reading = per_school_summary.groupby(["Spending Ranges (Per Student)"]).mean()["% Passing Reading"]156overall_passing_rate = (spending_passing_math + spending_passing_reading) / 2157# Assemble into data frame158spending_summary = pd.DataFrame({"Average Math Score" : spending_math_scores,159 "Average Reading Score": spending_reading_scores,160 "% Passing Math": spending_passing_math,161 "% Passing Reading": spending_passing_reading,162 "% Overall Passing Rate": overall_passing_rate})163# Minor data munging164spending_summary = spending_summary[["Average Math Score", 165 "Average Reading Score", 166 "% Passing Math", "% Passing Reading",167 "% Overall Passing Rate"]]168# Display results169spending_summary170# ## Scores by School Size171# In[9]:172# Establish the bins 173size_bins = [0, 1000, 2000, 5000]174group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]175# Categorize the spending based on the bins176per_school_summary["School Size"] = pd.cut(per_school_summary["Total Students"], size_bins, labels=group_names)177# Calculate the scores based on bins178size_math_scores = per_school_summary.groupby(["School Size"]).mean()["Average Math Score"]179size_reading_scores = per_school_summary.groupby(["School Size"]).mean()["Average Reading Score"]180size_passing_math = per_school_summary.groupby(["School Size"]).mean()["% Passing Math"]181size_passing_reading = per_school_summary.groupby(["School Size"]).mean()["% Passing Reading"]182overall_passing_rate = (size_passing_math + size_passing_reading) / 2183# Assemble into data frame184size_summary = pd.DataFrame({"Average Math Score" : size_math_scores,185 "Average Reading Score": size_reading_scores,186 "% Passing Math": size_passing_math,187 "% Passing Reading": size_passing_reading,188 "% Overall Passing Rate": overall_passing_rate})189# Minor data munging190size_summary = size_summary[["Average Math Score", 191 "Average Reading Score", 192 "% Passing Math", "% Passing Reading",193 "% Overall Passing Rate"]]194# Display results195size_summary196# ## Scores by School Type197# In[10]:198# Type | Average Math Score | Average Reading Score | % Passing Math | % Passing Reading | % Overall Passing Rate199type_math_scores = per_school_summary.groupby(["School Type"]).mean()["Average Math Score"]200type_reading_scores = per_school_summary.groupby(["School Type"]).mean()["Average Reading Score"]201type_passing_math = per_school_summary.groupby(["School Type"]).mean()["% Passing Math"]202type_passing_reading = per_school_summary.groupby(["School Type"]).mean()["% Passing Reading"]203overall_passing_rate = (type_passing_math + type_passing_reading) / 2204# Assemble into data frame205type_summary = pd.DataFrame({"Average Math Score" : type_math_scores,206 "Average Reading Score": type_reading_scores,207 "% Passing Math": type_passing_math,208 "% Passing Reading": type_passing_reading,209 "% Overall Passing Rate": overall_passing_rate})210# Minor data munging211type_summary = type_summary[["Average Math Score", 212 "Average Reading Score",213 "% Passing Math",214 "% Passing Reading",215 "% Overall Passing Rate"]]216# Display results...

Full Screen

Full Screen

test_pass_by_reference_or_value.py

Source:test_pass_by_reference_or_value.py Github

copy

Full Screen

1from __future__ import print_function2from __future__ import division3from __future__ import absolute_import4import unittest5from jnius import autoclass6class PassByReferenceOrValueTest(unittest.TestCase):7 def _verify(self, numbers, changed):8 for i in range(len(numbers)):9 self.assertEqual(numbers[i], i * i if changed else i)10 def _verify_all(self, numbers, changed):11 for n, c in zip(numbers, changed):12 self._verify(n, c)13 def test_single_param_static(self):14 VariablePassing = autoclass('org.jnius.VariablePassing')15 # passed by reference (default), numbers should change16 numbers = list(range(10))17 VariablePassing.singleParamStatic(numbers)18 self._verify(numbers, True)19 # passed by reference, numbers should change20 numbers = list(range(10))21 VariablePassing.singleParamStatic(numbers, pass_by_reference=True)22 self._verify(numbers, True)23 # passed by value, numbers should not change24 numbers = list(range(10))25 VariablePassing.singleParamStatic(numbers, pass_by_reference=False)26 self._verify(numbers, False)27 def test_single_param(self):28 VariablePassing = autoclass('org.jnius.VariablePassing')29 variablePassing = VariablePassing()30 # passed by reference (default), numbers should change31 numbers = list(range(10))32 variablePassing.singleParam(numbers)33 self._verify(numbers, True)34 # passed by reference, numbers should change35 numbers = list(range(10))36 variablePassing.singleParam(numbers, pass_by_reference=True)37 self._verify(numbers, True)38 # passed by value, numbers should not change39 numbers = list(range(10))40 variablePassing.singleParam(numbers, pass_by_reference=False)41 self._verify(numbers, False)42 def test_multiple_params_static(self):43 VariablePassing = autoclass('org.jnius.VariablePassing')44 # passed by reference (default), all numbers should change45 numbers = [list(range(10)) for _ in range(4)]46 VariablePassing.multipleParamsStatic(*numbers)47 self._verify_all(numbers, [True] * 4)48 # passed by reference, all numbers should change49 numbers = [list(range(10)) for _ in range(4)]50 VariablePassing.multipleParamsStatic(*numbers, pass_by_reference=True)51 self._verify_all(numbers, [True] * 4)52 # passed by value, no numbers should change53 numbers = [list(range(10)) for _ in range(4)]54 VariablePassing.multipleParamsStatic(*numbers, pass_by_reference=False)55 self._verify_all(numbers, [False] * 4)56 # only the first set of numbers should change57 numbers = [list(range(10)) for _ in range(4)]58 VariablePassing.multipleParamsStatic(*numbers, pass_by_reference=[True, False])59 self._verify_all(numbers, [True, False, False, False])60 # only the first set of numbers should not change61 numbers = [list(range(10)) for _ in range(4)]62 VariablePassing.multipleParamsStatic(*numbers, pass_by_reference=[False, True])63 self._verify_all(numbers, [False, True, True, True])64 # only the odd sets of numbers should change65 numbers = [list(range(10)) for _ in range(4)]66 changed = (True, False, True, False)67 VariablePassing.multipleParamsStatic(*numbers, pass_by_reference=changed)68 self._verify_all(numbers, changed)69 # only the even sets of numbers should change70 numbers = [list(range(10)) for _ in range(4)]71 changed = (False, True, False, True)72 VariablePassing.multipleParamsStatic(*numbers, pass_by_reference=changed)73 self._verify_all(numbers, changed)74 def test_multiple_params(self):75 VariablePassing = autoclass('org.jnius.VariablePassing')76 variablePassing = VariablePassing()77 # passed by reference (default), all numbers should change78 numbers = [list(range(10)) for _ in range(4)]79 variablePassing.multipleParams(*numbers)80 self._verify_all(numbers, [True] * 4)81 # passed by reference, all numbers should change82 numbers = [list(range(10)) for _ in range(4)]83 variablePassing.multipleParams(*numbers, pass_by_reference=True)84 self._verify_all(numbers, [True] * 4)85 # passed by value, no numbers should change86 numbers = [list(range(10)) for _ in range(4)]87 variablePassing.multipleParams(*numbers, pass_by_reference=False)88 self._verify_all(numbers, [False] * 4)89 # only the first set of numbers should change90 numbers = [list(range(10)) for _ in range(4)]91 variablePassing.multipleParams(*numbers, pass_by_reference=[True, False])92 self._verify_all(numbers, [True, False, False, False])93 # only the first set of numbers should not change94 numbers = [list(range(10)) for _ in range(4)]95 variablePassing.multipleParams(*numbers, pass_by_reference=[False, True])96 self._verify_all(numbers, [False, True, True, True])97 # only the odd sets of numbers should change98 numbers = [list(range(10)) for _ in range(4)]99 changed = (True, False, True, False)100 variablePassing.multipleParams(*numbers, pass_by_reference=changed)101 self._verify_all(numbers, changed)102 # only the even sets of numbers should change103 numbers = [list(range(10)) for _ in range(4)]104 changed = (False, True, False, True)105 variablePassing.multipleParams(*numbers, pass_by_reference=changed)106 self._verify_all(numbers, changed)107 def test_contructor_single_param(self):108 VariablePassing = autoclass('org.jnius.VariablePassing')109 # passed by reference (default), numbers should change110 numbers = list(range(10))111 variablePassing = VariablePassing(numbers)112 self._verify(numbers, True)113 # passed by reference, numbers should change114 numbers = list(range(10))115 variablePassing = VariablePassing(numbers, pass_by_reference=True)116 self._verify(numbers, True)117 # passed by value, numbers should not change118 numbers = list(range(10))119 variablePassing = VariablePassing(numbers, pass_by_reference=False)120 self._verify(numbers, False)121 def test_contructor_multiple_params(self):122 VariablePassing = autoclass('org.jnius.VariablePassing')123 # passed by reference (default), all numbers should change124 numbers = [list(range(10)) for _ in range(4)]125 variablePassing = VariablePassing(*numbers)126 self._verify_all(numbers, [True] * 4)127 # passed by reference, all numbers should change128 numbers = [list(range(10)) for _ in range(4)]129 variablePassing = VariablePassing(*numbers, pass_by_reference=True)130 self._verify_all(numbers, [True] * 4)131 # passed by value, no numbers should change132 numbers = [list(range(10)) for _ in range(4)]133 variablePassing = VariablePassing(*numbers, pass_by_reference=False)134 self._verify_all(numbers, [False] * 4)135 # only the first set of numbers should change136 numbers = [list(range(10)) for _ in range(4)]137 variablePassing = VariablePassing(*numbers, pass_by_reference=[True, False])138 self._verify_all(numbers, [True, False, False, False])139 # only the first set of numbers should not change140 numbers = [list(range(10)) for _ in range(4)]141 variablePassing = VariablePassing(*numbers, pass_by_reference=[False, True])142 self._verify_all(numbers, [False, True, True, True])143 # only the odd sets of numbers should change144 numbers = [list(range(10)) for _ in range(4)]145 changed = (True, False, True, False)146 variablePassing = VariablePassing(*numbers, pass_by_reference=changed)147 self._verify_all(numbers, changed)148 # only the even sets of numbers should change149 numbers = [list(range(10)) for _ in range(4)]150 changed = (False, True, False, True)151 variablePassing = VariablePassing(*numbers, pass_by_reference=changed)...

Full Screen

Full Screen

database.py

Source:database.py Github

copy

Full Screen

1import sqlite32import pandas as pd3con = sqlite3.connect("https://drive.google.com/file/d/1nJ-AvjMkAY0e8tV3iONIcE96psd1eiVi/view?usp=sharing")4cur = con.cursor()5def get_all_countries():6 return pd.read_sql('SELECT * FROM Country;', con)7def get_country_leagues(country_name):8 query = f'SELECT DISTINCT l.name FROM League l ' \9 f'JOIN Country c ON l.country_id = c.id ' \10 f'WHERE c.name = "{country_name}" ORDER BY l.name ASC'11 return pd.read_sql(query, con)12def get_league_teams(league_name):13 query = f'SELECT DISTINCT l.name , t.team_long_name FROM Match m JOIN League l ' \14 f'ON m.league_id = l.id JOIN Team t ON m.home_team_api_id = t.team_api_id WHERE' \15 f' l.name = "{league_name}"' \16 f'ORDER BY t.team_long_name ASC;'17 return pd.read_sql(query, con)18def get_match_predictors():19 query = f'SELECT CASE WHEN home_team_goal > away_team_goal THEN 1 WHEN home_team_goal < away_team_goal THEN 2 ' \20 f'ELSE 0 END AS Match_Outcome, ht_buildUpPlaySpeed, ht_buildUpPlayDribbling, ht_buildUpPlayPassing, ht_chanceCreationPassing, ht_chanceCreationCrossing,' \21 f' ht_chanceCreationShooting, ht_defencePressure, ht_defenceAggression, ht_defenceTeamWidth, at_buildUpPlaySpeed, at_buildUpPlayDribbling,' \22 f' at_buildUpPlayPassing,' \23 f'at_chanceCreationPassing, at_chanceCreationCrossing, at_chanceCreationShooting, at_defencePressure, at_defenceAggression, at_defenceTeamWidth ' \24 f'FROM Match m JOIN ( SELECT team_api_id, AVG(buildUpPlaySpeed) AS ht_buildUpPlaySpeed,' \25 f'AVG(buildUpPlayDribbling) AS ht_buildUpPlayDribbling, AVG(buildUpPlayPassing) AS ht_buildUpPlayPassing, ' \26 f'AVG(chanceCreationPassing) AS ht_chanceCreationPassing, AVG(chanceCreationCrossing) AS ht_chanceCreationCrossing,' \27 f'AVG(chanceCreationShooting) AS ht_chanceCreationShooting,' \28 f'AVG(defencePressure) AS ht_defencePressure, ' \29 f'AVG(defenceAggression) AS ht_defenceAggression, AVG(defenceTeamWidth) AS ht_defenceTeamWidth FROM Team_Attributes' \30 f' GROUP BY team_api_id ) ht_attr ON ht_attr.team_api_id = home_team_api_id JOIN ' \31 f'(SELECT team_api_id, AVG(buildUpPlaySpeed) AS at_buildUpPlaySpeed, AVG(buildUpPlayDribbling)' \32 f' AS at_buildUpPlayDribbling, AVG(buildUpPlayPassing) AS at_buildUpPlayPassing, ' \33 f'AVG(chanceCreationPassing) AS at_chanceCreationPassing, AVG(chanceCreationCrossing)' \34 f' AS at_chanceCreationCrossing, AVG(chanceCreationShooting) AS at_chanceCreationShooting, ' \35 f' AVG(defencePressure) AS at_defencePressure, AVG(defenceAggression) AS at_defenceAggression, ' \36 f'AVG(defenceTeamWidth) AS at_defenceTeamWidth FROM Team_Attributes GROUP BY team_api_id ) ' \37 f' at_attr ON at_attr.team_api_id = away_team_api_id;'38 return pd.read_sql(query, con)39def get_team_predictors(home_team_name, away_team_name):40 query = f'SELECT ht_buildUpPlaySpeed, ht_buildUpPlayDribbling, ht_buildUpPlayPassing, ht_chanceCreationPassing,' \41 f' ht_chanceCreationCrossing, ht_chanceCreationShooting, ht_defencePressure, ht_defenceAggression, ht_defenceTeamWidth, ' \42 f' at_buildUpPlaySpeed, at_buildUpPlayDribbling, at_buildUpPlayPassing, at_chanceCreationPassing, at_chanceCreationCrossing, at_chanceCreationShooting, ' \43 f'at_defencePressure, at_defenceAggression, at_defenceTeamWidth ' \44 f' FROM (SELECT AVG(buildUpPlaySpeed) AS ht_buildUpPlaySpeed, AVG(buildUpPlayDribbling) AS ht_buildUpPlayDribbling, AVG(buildUpPlayPassing) ' \45 f'AS ht_buildUpPlayPassing,AVG(chanceCreationPassing) AS ht_chanceCreationPassing, ' \46 f'AVG(chanceCreationCrossing) AS ht_chanceCreationCrossing, AVG(chanceCreationShooting) AS ht_chanceCreationShooting, ' \47 f'AVG(defencePressure) AS ht_defencePressure, AVG(defenceAggression) AS ht_defenceAggression, AVG(defenceTeamWidth) AS ht_defenceTeamWidth ' \48 f'FROM Team_Attributes home_attr JOIN Team home_team ' \49 f' ON home_attr.team_api_id = home_team.team_api_id ' \50 f' WHERE team_long_name = "{home_team_name}" ) ht_attr JOIN ( ' \51 f' SELECT AVG(buildUpPlaySpeed) AS at_buildUpPlaySpeed, AVG(buildUpPlayDribbling) AS at_buildUpPlayDribbling, AVG(buildUpPlayPassing) ' \52 f'AS at_buildUpPlayPassing, AVG(chanceCreationPassing) ' \53 f'AS at_chanceCreationPassing, AVG(chanceCreationCrossing) AS at_chanceCreationCrossing,' \54 f' AVG(chanceCreationShooting) AS at_chanceCreationShooting, AVG(defencePressure) AS at_defencePressure, AVG(defenceAggression) ' \55 f'AS at_defenceAggression, AVG(defenceTeamWidth) AS at_defenceTeamWidth ' \56 f'FROM Team_Attributes away_attr JOIN Team away_team ON away_attr.team_api_id = away_team.team_api_id ' \57 f'WHERE team_long_name = "{away_team_name}" ) at_attr ON 1=1 ;'...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run Robotframework automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful