Best Python code snippet using locust
cross_val.py
Source:cross_val.py  
1"""2Utilities for cross validation.3taken from scikits.learn4# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>,5#         Gael Varoquaux    <gael.varoquaux@normalesup.org>6# License: BSD Style.7# $Id$8changes to code by josef-pktd:9 - docstring formatting: underlines of headers10"""11from statsmodels.compat.python import range, lrange12import numpy as np13from itertools import combinations14################################################################################15class LeaveOneOut(object):16    """17    Leave-One-Out cross validation iterator:18    Provides train/test indexes to split data in train test sets19    """20    def __init__(self, n):21        """22        Leave-One-Out cross validation iterator:23        Provides train/test indexes to split data in train test sets24        Parameters25        ----------26        n: int27            Total number of elements28        Examples29        --------30        >>> from scikits.learn import cross_val31        >>> X = [[1, 2], [3, 4]]32        >>> y = [1, 2]33        >>> loo = cross_val.LeaveOneOut(2)34        >>> for train_index, test_index in loo:35        ...    print "TRAIN:", train_index, "TEST:", test_index36        ...    X_train, X_test, y_train, y_test = cross_val.split(train_index, test_index, X, y)37        ...    print X_train, X_test, y_train, y_test38        TRAIN: [False  True] TEST: [ True False]39        [[3 4]] [[1 2]] [2] [1]40        TRAIN: [ True False] TEST: [False  True]41        [[1 2]] [[3 4]] [1] [2]42        """43        self.n = n44    def __iter__(self):45        n = self.n46        for i in range(n):47            test_index  = np.zeros(n, dtype=np.bool)48            test_index[i] = True49            train_index = np.logical_not(test_index)50            yield train_index, test_index51    def __repr__(self):52        return '%s.%s(n=%i)' % (self.__class__.__module__,53                                self.__class__.__name__,54                                self.n,55                                )56################################################################################57class LeavePOut(object):58    """59    Leave-P-Out cross validation iterator:60    Provides train/test indexes to split data in train test sets61    """62    def __init__(self, n, p):63        """64        Leave-P-Out cross validation iterator:65        Provides train/test indexes to split data in train test sets66        Parameters67        ----------68        n: int69            Total number of elements70        p: int71            Size test sets72        Examples73        --------74        >>> from scikits.learn import cross_val75        >>> X = [[1, 2], [3, 4], [5, 6], [7, 8]]76        >>> y = [1, 2, 3, 4]77        >>> lpo = cross_val.LeavePOut(4, 2)78        >>> for train_index, test_index in lpo:79        ...    print "TRAIN:", train_index, "TEST:", test_index80        ...    X_train, X_test, y_train, y_test = cross_val.split(train_index, test_index, X, y)81        TRAIN: [False False  True  True] TEST: [ True  True False False]82        TRAIN: [False  True False  True] TEST: [ True False  True False]83        TRAIN: [False  True  True False] TEST: [ True False False  True]84        TRAIN: [ True False False  True] TEST: [False  True  True False]85        TRAIN: [ True False  True False] TEST: [False  True False  True]86        TRAIN: [ True  True False False] TEST: [False False  True  True]87        """88        self.n = n89        self.p = p90    def __iter__(self):91        n = self.n92        p = self.p93        comb = combinations(lrange(n), p)94        for idx in comb:95            test_index = np.zeros(n, dtype=np.bool)96            test_index[np.array(idx)] = True97            train_index = np.logical_not(test_index)98            yield train_index, test_index99    def __repr__(self):100        return '%s.%s(n=%i, p=%i)' % (101                                self.__class__.__module__,102                                self.__class__.__name__,103                                self.n,104                                self.p,105                                )106################################################################################107class KFold(object):108    """109    K-Folds cross validation iterator:110    Provides train/test indexes to split data in train test sets111    """112    def __init__(self, n, k):113        """114        K-Folds cross validation iterator:115        Provides train/test indexes to split data in train test sets116        Parameters117        ----------118        n: int119            Total number of elements120        k: int121            number of folds122        Examples123        --------124        >>> from scikits.learn import cross_val125        >>> X = [[1, 2], [3, 4], [1, 2], [3, 4]]126        >>> y = [1, 2, 3, 4]127        >>> kf = cross_val.KFold(4, k=2)128        >>> for train_index, test_index in kf:129        ...    print "TRAIN:", train_index, "TEST:", test_index130        ...    X_train, X_test, y_train, y_test = cross_val.split(train_index, test_index, X, y)131        TRAIN: [False False  True  True] TEST: [ True  True False False]132        TRAIN: [ True  True False False] TEST: [False False  True  True]133        Notes134        -----135        All the folds have size trunc(n/k), the last one has the complementary136        """137        assert k>0, ValueError('cannot have k below 1')138        assert k<n, ValueError('cannot have k=%d greater than %d'% (k, n))139        self.n = n140        self.k = k141    def __iter__(self):142        n = self.n143        k = self.k144        j = int(np.ceil(n/k))145        for i in range(k):146            test_index  = np.zeros(n, dtype=np.bool)147            if i<k-1:148                test_index[i*j:(i+1)*j] = True149            else:150                test_index[i*j:] = True151            train_index = np.logical_not(test_index)152            yield train_index, test_index153    def __repr__(self):154        return '%s.%s(n=%i, k=%i)' % (155                                self.__class__.__module__,156                                self.__class__.__name__,157                                self.n,158                                self.k,159                                )160################################################################################161class LeaveOneLabelOut(object):162    """163    Leave-One-Label_Out cross-validation iterator:164    Provides train/test indexes to split data in train test sets165    """166    def __init__(self, labels):167        """168        Leave-One-Label_Out cross validation:169        Provides train/test indexes to split data in train test sets170        Parameters171        ----------172        labels : list173                List of labels174        Examples175        --------176        >>> from scikits.learn import cross_val177        >>> X = [[1, 2], [3, 4], [5, 6], [7, 8]]178        >>> y = [1, 2, 1, 2]179        >>> labels = [1, 1, 2, 2]180        >>> lol = cross_val.LeaveOneLabelOut(labels)181        >>> for train_index, test_index in lol:182        ...    print "TRAIN:", train_index, "TEST:", test_index183        ...    X_train, X_test, y_train, y_test = cross_val.split(train_index, \184            test_index, X, y)185        ...    print X_train, X_test, y_train, y_test186        TRAIN: [False False  True  True] TEST: [ True  True False False]187        [[5 6]188        [7 8]] [[1 2]189        [3 4]] [1 2] [1 2]190        TRAIN: [ True  True False False] TEST: [False False  True  True]191        [[1 2]192        [3 4]] [[5 6]193        [7 8]] [1 2] [1 2]194        """195        self.labels = labels196    def __iter__(self):197        # We make a copy here to avoid side-effects during iteration198        labels = np.array(self.labels, copy=True)199        for i in np.unique(labels):200            test_index  = np.zeros(len(labels), dtype=np.bool)201            test_index[labels==i] = True202            train_index = np.logical_not(test_index)203            yield train_index, test_index204    def __repr__(self):205        return '%s.%s(labels=%s)' % (206                                self.__class__.__module__,207                                self.__class__.__name__,208                                self.labels,209                                )210def split(train_indexes, test_indexes, *args):211    """212    For each arg return a train and test subsets defined by indexes provided213    in train_indexes and test_indexes214    """215    ret = []216    for arg in args:217        arg = np.asanyarray(arg)218        arg_train = arg[train_indexes]219        arg_test  = arg[test_indexes]220        ret.append(arg_train)221        ret.append(arg_test)222    return ret223'''224 >>> cv = cross_val.LeaveOneLabelOut(X, y) # y making y optional and225possible to add other arrays of the same shape[0] too226 >>> for X_train, y_train, X_test, y_test in cv:227 ...      print np.sqrt((model.fit(X_train, y_train).predict(X_test)228- y_test) ** 2).mean())229'''230################################################################################231#below: Author: josef-pktd232class KStepAhead(object):233    """234    KStepAhead cross validation iterator:235    Provides fit/test indexes to split data in sequential sets236    """237    def __init__(self, n, k=1, start=None, kall=True, return_slice=True):238        """239        KStepAhead cross validation iterator:240        Provides train/test indexes to split data in train test sets241        Parameters242        ----------243        n: int244            Total number of elements245        k : int246            number of steps ahead247        start : int248            initial size of data for fitting249        kall : boolean250            if true. all values for up to k-step ahead are included in the test index.251            If false, then only the k-th step ahead value is returnd252        Notes253        -----254        I don't think this is really useful, because it can be done with255        a very simple loop instead.256        Useful as a plugin, but it could return slices instead for faster array access.257        Examples258        --------259        >>> from scikits.learn import cross_val260        >>> X = [[1, 2], [3, 4]]261        >>> y = [1, 2]262        >>> loo = cross_val.LeaveOneOut(2)263        >>> for train_index, test_index in loo:264        ...    print "TRAIN:", train_index, "TEST:", test_index265        ...    X_train, X_test, y_train, y_test = cross_val.split(train_index, test_index, X, y)266        ...    print X_train, X_test, y_train, y_test267        TRAIN: [False  True] TEST: [ True False]268        [[3 4]] [[1 2]] [2] [1]269        TRAIN: [ True False] TEST: [False  True]270        [[1 2]] [[3 4]] [1] [2]271        """272        self.n = n273        self.k = k274        if start is None:275            start = int(np.trunc(n*0.25)) # pick something arbitrary276        self.start = start277        self.kall = kall278        self.return_slice = return_slice279    def __iter__(self):280        n = self.n281        k = self.k282        start = self.start283        if self.return_slice:284            for i in range(start, n-k):285                train_slice = slice(None, i, None)286                if self.kall:287                    test_slice = slice(i, i+k)288                else:289                    test_slice = slice(i+k-1, i+k)290                yield train_slice, test_slice291        else: #for compatibility with other iterators292            for i in range(start, n-k):293                train_index  = np.zeros(n, dtype=np.bool)294                train_index[:i] = True295                test_index  = np.zeros(n, dtype=np.bool)296                if self.kall:297                    test_index[i:i+k] = True # np.logical_not(test_index)298                else:299                    test_index[i+k-1:i+k] = True300                #or faster to return np.arange(i,i+k) ?301                #returning slice should be faster in this case302                yield train_index, test_index303    def __repr__(self):304        return '%s.%s(n=%i)' % (self.__class__.__module__,305                                self.__class__.__name__,306                                self.n,...entrenar.py
Source:entrenar.py  
1from utilities import *2from scipy.sparse import coo_matrix, hstack3from sklearn.tree import DecisionTreeClassifier4from sklearn.cross_validation import cross_val_score, KFold5from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB6from sklearn.neighbors import KNeighborsClassifier7from sklearn import svm8from sklearn import ensemble9from sklearn.grid_search import GridSearchCV10from sklearn.metrics import fbeta_score, make_scorer11import pickle12# Leo los mails (poner los paths correctos).13ham_txt= json.load(open('../../dataset_json/ham_dev.json'))14spam_txt= json.load(open('../../dataset_json/spam_dev.json'))15# Pongo todos los mails en minusculas16ham_txt = map(lambda x: x.lower(), ham_txt)17spam_txt = map(lambda x: x.lower(), spam_txt)18# Armo el data frame19df = pd.DataFrame(ham_txt+spam_txt, columns=['text'])20# 0 = ham, 1 = spam (es necesario binarizar para poder usar el score f_0.5)21df['class'] = [0 for _ in range(len(ham_txt))]+[1 for _ in range(len(spam_txt))]22print "Lei json y arme data frame"23# Extraigo atributos simples24# Agrego feature que clasifica los mails segun tienen o no html25HTML = coo_matrix(map(hasHTML, df.text)).transpose()26#) Agrego feature que clasifica los mails segun tienen o no subject27SUBJ = coo_matrix(map(hasSubject, df.text)).transpose()28# Longitud del mail.29LEN = coo_matrix(map(len, df.text)).transpose()30# Cantidad de espacios en el mail.31SPACES = coo_matrix(map(count_spaces, df.text)).transpose()32print "Clasifique por atributos simples"33vectorizer = obtenerVectorizer()34word_freq_matrix = vectorizer.transform(df.text)35print "Arme matriz"36X = hstack([HTML, SUBJ, LEN, SPACES, word_freq_matrix]).toarray()37y = df['class']38# Definimos F-Beta score con Beta=0.539# (favorecemos precision sobre recall)40f05_scorer = make_scorer(fbeta_score, beta=0.5)41print "Defino clasificadores"42# Decision Tree43'''44print "Decision Tree"45clf = DecisionTreeClassifier(max_features = None, max_leaf_nodes = 100, min_samples_split = 2, criterion = 'gini')46kf = KFold(72000, n_folds=10, shuffle=True)47best_score = 048best_clf = 049for train_index, test_index in kf:50  print("TRAIN:", train_index, "TEST:", test_index)51  X_train, X_test = X[train_index], X[test_index]52  y_train, y_test = y[train_index], y[test_index]53  clf.fit(X_train, y_train)54  score = f05_scorer(clf, X_test, y_test)55  if score > best_score:56    best_clf = clf57    best_score = score58fout = open('dectree.pickle','w')59pickle.dump(best_clf,fout)60fout.close()61# Random Forest62print "Random Forest"63clf = ensemble.RandomForestClassifier(max_features = 0.5, max_leaf_nodes = None, min_samples_split = 4, criterion = 'gini', n_estimators = 20, n_jobs=4)64kf = KFold(72000, n_folds=10, shuffle=True)65best_score = 066best_clf = 067for train_index, test_index in kf:68  print("TRAIN:", train_index, "TEST:", test_index)69  X_train, X_test = X[train_index], X[test_index]70  y_train, y_test = y[train_index], y[test_index]71  clf.fit(X_train, y_train)72  score = f05_scorer(clf, X_test, y_test)73  if score > best_score:74    best_clf = clf75    best_score = score76fout = open('ranfor.pickle','w')77pickle.dump(clf,fout)78fout.close()79'''80# SVM81print "SVM"82clf = svm.SVC(kernel = 'rbf', C = 1, gamma = 1.0)83kf = KFold(72000, n_folds=3, shuffle=True)84best_score = 085best_clf = 086for train_index, test_index in kf:87  print("TRAIN:", train_index, "TEST:", test_index)88  X_train, X_test = X[train_index], X[test_index]89  y_train, y_test = y[train_index], y[test_index]90  clf.fit(X_train, y_train)91  score = f05_scorer(clf, X_test, y_test)92  if score > best_score:93    best_clf = clf94    best_score = score95fout = open('svm.pickle','w')96pickle.dump(clf,fout)97fout.close()98'''99# Naive Bayes100print "Gaussian NB"101clf = GaussianNB()102kf = KFold(72000, n_folds=10, shuffle=True)103best_score = 0104best_clf = 0105for train_index, test_index in kf:106  print("TRAIN:", train_index, "TEST:", test_index)107  X_train, X_test = X[train_index], X[test_index]108  y_train, y_test = y[train_index], y[test_index]109  clf.fit(X_train, y_train)110  score = f05_scorer(clf, X_test, y_test)111  if score > best_score:112    best_clf = clf113    best_score = score114fout = open('gaussianNB.pickle','w')115pickle.dump(clf,fout)116fout.close()117#########################118print "Multinomial NB"119clf = MultinomialNB(alpha = 0.25, fit_prior = False)120kf = KFold(72000, n_folds=10, shuffle=True)121best_score = 0122best_clf = 0123for train_index, test_index in kf:124  print("TRAIN:", train_index, "TEST:", test_index)125  X_train, X_test = X[train_index], X[test_index]126  y_train, y_test = y[train_index], y[test_index]127  clf.fit(X_train, y_train)128  score = f05_scorer(clf, X_test, y_test)129  if score > best_score:130    best_clf = clf131    best_score = score132fout = open('multinomialNB.pickle','w')133pickle.dump(clf,fout)134fout.close()135#######################136print "Bernoulli NB"137clf = BernoulliNB(binarize = 0.0, alpha = 0.25, fit_prior = False)138kf = KFold(72000, n_folds=10, shuffle=True)139best_score = 0140best_clf = 0141for train_index, test_index in kf:142  print("TRAIN:", train_index, "TEST:", test_index)143  X_train, X_test = X[train_index], X[test_index]144  y_train, y_test = y[train_index], y[test_index]145  clf.fit(X_train, y_train)146  score = f05_scorer(clf, X_test, y_test)147  if score > best_score:148    best_clf = clf149    best_score = score150fout = open('bernoulliNB.pickle','w')151pickle.dump(clf,fout)152fout.close()153# KNN154print "KNN"155clf = KNeighborsClassifier(n_neighbors = 1, weights = 'uniform', leaf_size = 15, algorithm = 'kd_tree')156kf = KFold(72000, n_folds=10, shuffle=True)157best_score = 0158best_clf = 0159for train_index, test_index in kf:160  print("TRAIN:", train_index, "TEST:", test_index)161  X_train, X_test = X[train_index], X[test_index]162  y_train, y_test = y[train_index], y[test_index]163  clf.fit(X_train, y_train)164  score = f05_scorer(clf, X_test, y_test)165  if score > best_score:166    best_clf = clf167    best_score = score168fout = open('knn.pickle','w')169pickle.dump(clf,fout)170fout.close()...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
