Best Python code snippet using lisa_python
PCA_tools.py
Source:PCA_tools.py  
1"""2=========3PCA Tools4=========5A set of tools for PCA analysis, singular value decomposition,6total least squares, and other linear fitting methods.7Running this code independently tests the fitting functions with different8types of random data.9"""10import numpy11  12def efuncs(arr, return_others=False):13    """14    Determine eigenfunctions of an array for use with15    PCA cleaning16    """17    if hasattr(arr,'filled'):18        arr = arr.filled(0)19    covmat = numpy.dot(arr.T,arr)20    evals,evects = numpy.linalg.eigh(covmat)21    inds = np.argsort(evals)[::-1]22    evals = evals[inds]23    evects = evects[:,inds]24    efuncarr = numpy.dot(arr,evects)25    if return_others:26        return efuncarr,covmat,evals,evects27    else:28        return efuncarr29def PCA_linear_fit(data1, data2, print_results=False, ignore_nans=True):30    """31    Use principal component analysis to determine the best linear fit to the data.32    data1 - x array33    data2 - y array34    returns m,b in the equation y = m x + b35    print tells you some information about what fraction of the variance is accounted for36    ignore_nans will remove NAN values from BOTH arrays before computing37    Although this works well for the tests below, it fails horrifically on some38    rather well-behaved data sets.  I don't understand why this is, but that's39    why I wrote the total_least_squares SVD code below.40    """41    if ignore_nans:42        badvals = numpy.isnan(data1) + numpy.isnan(data2)43        if badvals.sum():44            data1 = data1[True-badvals]45            data2 = data2[True-badvals]46    47    arr = numpy.array([data1-data1.mean(),data2-data2.mean()])48    covmat = numpy.dot(arr,arr.T)49    evals,evects = numpy.linalg.eig(covmat)50    max_ind = evals.argmax()51    if max_ind:52        evects = evects[::-1,::-1]53    m = evects[1,0] / evects[0,0]54    b = data2.mean() - m*data1.mean()55    varfrac = evals[max_ind]/evals.sum()*100.56    if varfrac < 50:57        raise ValueError("ERROR: PCA Linear Fit accounts for less than half the variance; this is impossible by definition.")58    if print_results:59        print "PCA Best fit y = %g x + %g" % (m,b)60        print "The fit accounts for %0.3g%% of the variance." % (varfrac)61        print "Chi^2 = %g, N = %i" % (((data2-(data1*m+b))**2).sum(),data1.shape[0]-2)62    return m,b63def total_least_squares(data1, data2, data1err=None, data2err=None,64        print_results=False, ignore_nans=True, intercept=True,65        return_error=False, inf=1e10):66    """67    Use Singular Value Decomposition to determine the Total Least Squares linear fit to the data.68    (e.g. http://en.wikipedia.org/wiki/Total_least_squares)69    data1 - x array70    data2 - y array71    if intercept:72        returns m,b in the equation y = m x + b73    else:74        returns m75    print tells you some information about what fraction of the variance is accounted for76    ignore_nans will remove NAN values from BOTH arrays before computing77    Parameters78    ----------79    data1,data2 : np.ndarray80        Vectors of the same length indicating the 'x' and 'y' vectors to fit81    data1err,data2err : np.ndarray or None82        Vectors of the same length as data1,data2 holding the 1-sigma error values83    """84    if ignore_nans:85        badvals = numpy.isnan(data1) + numpy.isnan(data2)86        if data1err is not None:87            badvals += numpy.isnan(data1err)88        if data2err is not None:89            badvals += numpy.isnan(data2err)90        goodvals = True-badvals91        if goodvals.sum() < 2:92            if intercept:93                return 0,094            else:95                return 096        if badvals.sum():97            data1 = data1[goodvals]98            data2 = data2[goodvals]99    100    if intercept:101        dm1 = data1.mean()102        dm2 = data2.mean()103    else:104        dm1,dm2 = 0,0105    arr = numpy.array([data1-dm1,data2-dm2]).T106    U,S,V = numpy.linalg.svd(arr, full_matrices=False)107    # v should be sorted.  108    # this solution should be equivalent to v[1,0] / -v[1,1]109    # but I'm using this: http://stackoverflow.com/questions/5879986/pseudo-inverse-of-sparse-matrix-in-python110    M = V[-1,0]/-V[-1,-1]111    varfrac = S[0]/S.sum()*100112    if varfrac < 50:113        raise ValueError("ERROR: SVD/TLS Linear Fit accounts for less than half the variance; this is impossible by definition.")114    # this is performed after so that TLS gives a "guess"115    if data1err is not None or data2err is not None:116        try:117            from scipy.odr import RealData,Model,ODR118        except ImportError:119            raise ImportError("Could not import scipy; cannot run Total Least Squares")120        def linmodel(B,x):121            if intercept:122                return B[0]*x + B[1]123            else:124                return B[0]*x 125        if data1err is not None:126            data1err = data1err[goodvals]127            data1err[data1err<=0] = inf128        if data2err is not None:129            data2err = data2err[goodvals]130            data2err[data2err<=0] = inf131        if any([data1.shape != other.shape for other in (data2,data1err,data2err)]):132            raise ValueError("Data shapes do not match")133        linear = Model(linmodel)134        data = RealData(data1,data2,sx=data1err,sy=data2err)135        B = data2.mean() - M*data1.mean()136        beta0 = [M,B] if intercept else [M]137        myodr = ODR(data,linear,beta0=beta0)138        output = myodr.run()139        if print_results:140            output.pprint()141        if return_error:142            return numpy.concatenate([output.beta,output.sd_beta])143        else:144            return output.beta145    if intercept:146        B = data2.mean() - M*data1.mean()147        if print_results:148            print "TLS Best fit y = %g x + %g" % (M,B)149            print "The fit accounts for %0.3g%% of the variance." % (varfrac)150            print "Chi^2 = %g, N = %i" % (((data2-(data1*M+B))**2).sum(),data1.shape[0]-2)151        return M,B152    else:153        if print_results:154            print "TLS Best fit y = %g x" % (M)155            print "The fit accounts for %0.3g%% of the variance." % (varfrac)156            print "Chi^2 = %g, N = %i" % (((data2-(data1*M))**2).sum(),data1.shape[0]-1)157        return M158def smooth_waterfall(arr,fwhm=4.0,unsharp=False):159    """160    Smooth a waterfall plot.161    If unsharp set, remove the smoothed component162    Input array should have dimensions [timelen, nbolos]163    """164    timelen,nbolos = arr.shape165    kernel = numpy.exp(-numpy.linspace(-timelen/2,timelen/2,timelen)**2/166            (2.0*fwhm/numpy.sqrt(8*numpy.log(2))))167    kernel /= kernel.sum()168    kernelfft = numpy.fft.fft(kernel)169    arrfft = numpy.fft.fft(arr,axis=0)170    arrconv = numpy.fft.fftshift(171            numpy.fft.ifft(arrfft*172            numpy.outer(kernelfft,numpy.ones(nbolos)), 173            axis=0).real,axes=(0,))174    if unsharp:175        return arr-arrconv176    else:177        return arrconv178def pca_subtract(arr,ncomps):179    """180    Compute the eigenfunctions and values of correlated data, then subtract off181    the *ncomps* most correlated components, transform back to the original182    space, and return that.183    """184    if hasattr(arr,'filled'):185        arr = arr.filled(0)186    covmat = numpy.dot(arr.T,arr)187    evals,evects = numpy.linalg.eig(covmat)188    efuncarr = numpy.dot(arr,evects)189    efuncarr[:,0:ncomps] = 0190    return numpy.inner(efuncarr,evects)191def unpca_subtract(arr,ncomps):192    """193    Like pca_subtract, except `keep` the *ncomps* most correlated components194    and reject the others195    """196    if hasattr(arr,'filled'):197        arr = arr.filled(0)198    covmat = numpy.dot(arr.T,arr)199    evals,evects = numpy.linalg.eig(covmat)200    efuncarr = numpy.dot(arr,evects)201    efuncarr[:,ncomps:] = 0202    return numpy.inner(efuncarr,evects)203def pymc_linear_fit(data1, data2, data1err=None, data2err=None,204        print_results=False, intercept=True, nsample=5000, burn=1000,205        thin=10, return_MC=False, guess=None, ignore_nans=True,206        progress_bar=True):207    old_errsettings = numpy.geterr()208    import pymc # pymc breaks np error settings209    numpy.seterr(**old_errsettings)210    if ignore_nans:211        badvals = numpy.isnan(data1) + numpy.isnan(data2)212        if data1err is not None:213            badvals += numpy.isnan(data1err)214        if data2err is not None:215            badvals += numpy.isnan(data2err)216        goodvals = True-badvals217        if goodvals.sum() < 2:218            if intercept:219                return 0,0220            else:221                return 0222        if badvals.sum():223            data1 = data1[goodvals]224            data2 = data2[goodvals]225            if data1err is not None:226                data1err = data1err[goodvals]227            if data2err is not None:228                data2err = data2err[goodvals]229    if guess is None:230        guess = (0,0)231    xmu = pymc.distributions.Uninformative(name='x_observed',value=data1)232    if data1err is None:233        xdata = pymc.distributions.Normal('x', mu=xmu, observed=True,234                value=data1, tau=1, trace=False)235    else:236        xtau = pymc.distributions.Uninformative(name='x_tau',237                value=1.0/data1err**2, observed=True, trace=False)238        xdata = pymc.distributions.Normal('x', mu=xmu, observed=True,239                value=data1, tau=xtau, trace=False)240    d={'slope':pymc.distributions.Uninformative(name='slope', value=guess[0]), }241    if intercept:242        d['intercept'] = pymc.distributions.Uninformative(name='intercept',243                value=guess[1])244        @pymc.deterministic(trace=False)245        def model(x=xdata,slope=d['slope'],intercept=d['intercept']):246            return x*slope+intercept247    else:248        @pymc.deterministic(trace=False)249        def model(x=xdata,slope=d['slope']):250            return x*slope251    d['f'] = model252    if data2err is None:253        ydata = pymc.distributions.Normal('y', mu=model, observed=True,254                value=data2, tau=1, trace=False)255    else:256        ytau = pymc.distributions.Uninformative(name='y_tau',257                value=1.0/data2err**2, observed=True, trace=False)258        ydata = pymc.distributions.Normal('y', mu=model, observed=True,259                value=data2, tau=ytau, trace=False)260    d['y'] = ydata261    262    MC = pymc.MCMC(d)263    MC.sample(nsample,burn=burn,thin=thin,progress_bar=progress_bar)264    MCs = MC.stats()265    m,em = MCs['slope']['mean'],MCs['slope']['standard deviation']266    if intercept: 267        b,eb = MCs['intercept']['mean'],MCs['intercept']['standard deviation']268    if print_results:269        print "MCMC Best fit y = %g x" % (m),270        if intercept: 271            print " + %g" % (b)272        else:273            print ""274        print "m = %g +/- %g" % (m,em)275        if intercept:276            print "b = %g +/- %g" % (b,eb)277        print "Chi^2 = %g, N = %i" % (((data2-(data1*m))**2).sum(),data1.shape[0]-1)278    if return_MC: 279        return MC280    if intercept:281        return m,b282    else:283        return m284        285if __name__ == "__main__":286    from pylab import *287    md,bd = {},{}288    xvals = numpy.linspace(0,100,100)289    yvals = numpy.linspace(0,100,100)290    md['ideal'],bd['ideal'] = ({'PCA':0,'TLS':0, 'poly':0, 'pymc':0},{'PCA':0,'TLS':0, 'poly':0, 'pymc':0},)291    md['ideal']['poly'],bd['ideal']['poly'] = polyfit(xvals,yvals,1)292    md['ideal']['PCA'],bd['ideal']['PCA']   = PCA_linear_fit(xvals,yvals,print_results=True)293    md['ideal']['TLS'],bd['ideal']['TLS']   = total_least_squares(xvals,yvals,print_results=True)294    md['ideal']['pymc'],bd['ideal']['pymc'] = pymc_linear_fit(xvals,yvals,print_results=True)295    md['neg'],bd['neg'] = ({'PCA':0,'TLS':0, 'poly':0, 'pymc':0},{'PCA':0,'TLS':0, 'poly':0, 'pymc':0})296    md['neg']['poly'],bd['neg']['poly'] = polyfit(xvals,yvals*-1,1)297    md['neg']['PCA'],bd['neg']['PCA']   = PCA_linear_fit(xvals,yvals*-1,print_results=True)298    md['neg']['TLS'],bd['neg']['TLS']   = total_least_squares(xvals,yvals*-1,print_results=True)299    md['neg']['pymc'],bd['neg']['pymc'] = pymc_linear_fit(xvals,yvals*-1,print_results=True)300    md['intercept'],bd['intercept'] = ({'PCA':0,'TLS':0, 'poly':0, 'pymc':0},{'PCA':0,'TLS':0, 'poly':0, 'pymc':0})301    md['intercept']['poly'],bd['intercept']['poly'] = polyfit(xvals,yvals+1,1)302    md['intercept']['PCA'],bd['intercept']['PCA']   = PCA_linear_fit(xvals,yvals+1,print_results=True)303    md['intercept']['TLS'],bd['intercept']['TLS']   = total_least_squares(xvals,yvals+1,print_results=True)304    md['intercept']['pymc'],bd['intercept']['pymc'] = pymc_linear_fit(xvals,yvals+1,print_results=True)305    md['noise'],bd['noise'] = ({'PCA':0,'TLS':0, 'poly':0, 'pymc':0},{'PCA':0,'TLS':0, 'poly':0, 'pymc':0})306    md['noise']['poly'],bd['noise']['poly'] = polyfit(xvals,yvals+random(100),1)307    md['noise']['PCA'],bd['noise']['PCA']   = PCA_linear_fit(xvals,yvals+random(100),print_results=True)308    md['noise']['TLS'],bd['noise']['TLS']   = total_least_squares(xvals,yvals+random(100),print_results=True)309    md['noise']['pymc'],bd['noise']['pymc'] = pymc_linear_fit(xvals,yvals+random(100),print_results=True)310    md['highnoise'],bd['highnoise'] = ({'PCA':0,'TLS':0, 'poly':0, 'pymc':0},{'PCA':0,'TLS':0, 'poly':0, 'pymc':0})311    md['highnoise']['poly'],bd['highnoise']['poly'] = polyfit(xvals,yvals+random(100)*50,1)312    md['highnoise']['PCA'],bd['highnoise']['PCA']   = PCA_linear_fit(xvals,yvals+random(100)*50,print_results=True)313    md['highnoise']['TLS'],bd['highnoise']['TLS']   = total_least_squares(xvals,yvals+random(100)*50,print_results=True)314    md['highnoise']['pymc'],bd['highnoise']['pymc'] = pymc_linear_fit(xvals,yvals+random(100)*50,print_results=True)315    md['random'],bd['random'] = ({'PCA':0,'TLS':0, 'poly':0, 'pymc':0},{'PCA':0,'TLS':0, 'poly':0, 'pymc':0})316    xr,yr = random(100),random(100)317    md['random']['poly'],bd['random']['poly'] = polyfit(xr,yr,1)318    md['random']['PCA'],bd['random']['PCA']   = PCA_linear_fit(xr,yr,print_results=True)319    md['random']['TLS'],bd['random']['TLS']   = total_least_squares(xr,yr,print_results=True)320    md['random']['pymc'],bd['random']['pymc'] = pymc_linear_fit(xr,yr,print_results=True)321    md['xnoise'],bd['xnoise'] = ({'PCA':0,'TLS':0, 'poly':0, 'pymc':0},{'PCA':0,'TLS':0, 'poly':0, 'pymc':0})322    md['xnoise']['poly'],bd['xnoise']['poly'] = polyfit(xvals+random(100)*5,yvals+random(100)*5,1)323    md['xnoise']['PCA'],bd['xnoise']['PCA']   = PCA_linear_fit(xvals+random(100)*5,yvals+random(100)*5,print_results=True)324    md['xnoise']['TLS'],bd['xnoise']['TLS']   = total_least_squares(xvals+random(100)*5,yvals+random(100)*5,print_results=True)325    md['xnoise']['pymc'],bd['xnoise']['pymc'] = pymc_linear_fit(xvals+random(100)*5,yvals+random(100)*5,print_results=True)326    md['xhighnoise'],bd['xhighnoise'] = ({'PCA':0,'TLS':0, 'poly':0, 'pymc':0},{'PCA':0,'TLS':0, 'poly':0, 'pymc':0})327    md['xhighnoise']['poly'],bd['xhighnoise']['poly'] = polyfit(xvals+random(100)*50,yvals+random(100)*50,1)328    md['xhighnoise']['PCA'],bd['xhighnoise']['PCA']   = PCA_linear_fit(xvals+random(100)*50,yvals+random(100)*50,print_results=True)329    md['xhighnoise']['TLS'],bd['xhighnoise']['TLS']   = total_least_squares(xvals+random(100)*50,yvals+random(100)*50,print_results=True)330    md['xhighnoise']['pymc'],bd['xhighnoise']['pymc'] = pymc_linear_fit(xvals+random(100)*50,yvals+random(100)*50,print_results=True)331    md['witherrors'],bd['witherrors'] = ({'PCA':0,'TLS':0, 'poly':0, 'pymc':0},{'PCA':0,'TLS':0, 'poly':0, 'pymc':0})332    md['witherrors']['poly'],bd['witherrors']['poly'] = 0,0333    md['witherrors']['PCA'],bd['witherrors']['PCA']   = 0,0334    xerr,yerr = randn(100)/10.0+1.0+sqrt(xvals),randn(100)/10.0+1.0+sqrt(yvals)335    x,y = xvals+xerr,yvals+yerr336    md['witherrors']['TLS'],bd['witherrors']['TLS']   = total_least_squares(x,y,data1err=xerr,data2err=yerr,print_results=True)337    md['witherrors']['pymc'],bd['witherrors']['pymc'] = pymc_linear_fit(x,y,data1err=xerr,data2err=yerr,print_results=True)338    print "Slopes: "339    toprow = (" "*20) + " ".join(["%20s" % k for k in md['ideal']])340    print toprow341    for colname,column in md.items():342        print "%20s" % colname,343        for rowname,row in column.items():344            print "%20s" % row,345        print346            347    print "Intercepts: "348    toprow = (" "*20) + " ".join(["%20s" % k for k in bd['ideal']])349    print toprow350    for colname,column in bd.items():351        print "%20s" % colname,352        for rowname,row in column.items():353            print "%20s" % row,354        print355            356    print "PyMC linear tests"357    MC1 = pymc_linear_fit(x,y,intercept=False,print_results=True,return_MC=True)358    MC2 = pymc_linear_fit(x,y,xerr,yerr,intercept=False,print_results=True,return_MC=True)359    hoggdata = np.array([360        [1,201,592,61,9,-0.84],361        [2,244,401,25,4,0.31],362        [3,47,583,38,11,0.64],363        [4,287,402,15,7,-0.27],364        [5,203,495,21,5,-0.33],365        [6,58,173,15,9,0.67],366        [7,210,479,27,4,-0.02],367        [8,202,504,14,4,-0.05],368        [9,198,510,30,11,-0.84],369        [10,158,416,16,7,-0.69],370        [11,165,393,14,5,0.30],371        [12,201,442,25,5,-0.46],372        [13,157,317,52,5,-0.03],373        [14,131,311,16,6,0.50],374        [15,166,400,34,6,0.73],375        [16,160,337,31,5,-0.52],376        [17,186,423,42,9,0.90],377        [18,125,334,26,8,0.40],378        [19,218,533,16,6,-0.78],379        [20,146,344,22,5,-0.56],380        ])381    linear_fitters = [total_least_squares, PCA_linear_fit, pymc_linear_fit]382    for method in linear_fitters:383        print method.__name__,method(hoggdata[:,1],hoggdata[:,2])384        try:385            print method.__name__,method(hoggdata[:,1],hoggdata[:,2], data2err=hoggdata[:,3])386        except TypeError:387            pass388        except AttributeError:389            pass390        try:391            print method.__name__,method(hoggdata[:,1],hoggdata[:,2], data2err=hoggdata[:,3], data1err=hoggdata[:,4])392        except TypeError:...print_results.py
Source:print_results.py  
...30#       below by the function definition of the print_results function. 31#       Notice that this function doesn't to return anything because it  32#       prints a summary of the results using results_dic and results_stats_dic33# 34def print_results(results_dic, results_stats_dic, model, 35                  print_incorrect_dogs = False, print_incorrect_breed = False):36    """37    Prints summary results on the classification and then prints incorrectly 38    classified dogs and incorrectly classified dog breeds if user indicates 39    they want those printouts (use non-default values)40    Parameters:41      results_dic - Dictionary with key as image filename and value as a List 42             (index)idx 0 = pet image label (string)43                    idx 1 = classifier label (string)44                    idx 2 = 1/0 (int)  where 1 = match between pet image and 45                            classifer labels and 0 = no match between labels46                    idx 3 = 1/0 (int)  where 1 = pet image 'is-a' dog and 47                            0 = pet Image 'is-NOT-a' dog. 48                    idx 4 = 1/0 (int)  where 1 = Classifier classifies image ...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
