Best Python code snippet using tempest_python
app_kfold_sklearn.py
Source:app_kfold_sklearn.py  
1# -*- coding: utf-8 -*-2from tweets import tweets_preprocessor3from models import Sklearn4from sklearn.model_selection import cross_val_score5from utils import log, save_classifier, log_classifier6from data import train_data, test_data7from sklearn.model_selection import StratifiedKFold8import uuid9########################################################################################################################10PREPROCESSING_ALGORITHMS = {11    '1258a9d2-111e-4d4a-acda-852dd7ba3e88': {12        'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,13        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,14        'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,15        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,16        'join': True},17    '60314ef9-271d-4865-a7db-6889b1670f59': {18        'add_link_flag': False, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': False,19        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,20        'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,21        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,22        'join': True},23    '4c2e484d-5cb8-4e3e-ba7b-679ae7a73fca': {24        'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,25        'add_keyword_flag': True, 'add_location_flag': True, 'remove_links': True, 'remove_users': True,26        'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,27        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,28        'join': True},29    '8b7db91c-c8bf-40f2-986a-83a659b63ba6': {30        'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': False,31        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,32        'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,33        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,34        'join': True},35    '7bc816a1-25df-4649-8570-0012d0acd72a': {36        'add_link_flag': False, 'add_user_flag': False, 'add_hash_flag': False, 'add_number_flag': False,37        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,38        'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,39        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,40        'join': True},41    'a85c8435-6f23-4015-9e8c-19547222d6ce': {42        'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,43        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,44        'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': False,45        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,46        'join': True},47    'b054e509-4f04-44f2-bcf9-14fa8af4eeed': {48        'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,49        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,50        'remove_hash': True, 'unslang': True, 'split_words': False, 'stem': False, 'remove_punctuations': False,51        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,52        'join': True},53    '2e359f0b-bfb9-4eda-b2a4-cd839c122de6': {54        'add_link_flag': False, 'add_user_flag': False, 'add_hash_flag': False, 'add_number_flag': False,55        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,56        'remove_hash': True, 'unslang': True, 'split_words': False, 'stem': False, 'remove_punctuations': False,57        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,58        'join': True},59    '71bd09db-e104-462d-887a-74389438bb49': {60        'add_link_flag': False, 'add_user_flag': False, 'add_hash_flag': False, 'add_number_flag': False,61        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,62        'remove_hash': True, 'unslang': True, 'split_words': False, 'stem': False, 'remove_punctuations': True,63        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,64        'join': True},65    'd3cc3c6e-10de-4b27-8712-8017da428e41': {66        'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,67        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,68        'remove_hash': True, 'unslang': True, 'split_words': False, 'stem': False, 'remove_punctuations': True,69        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,70        'join': True}71}72VECTORIZERS = [73    {74        'TYPE': 'TFIDF',75        'OPTIONS': {76            'binary': True,77            'ngram_range': (1, 1)78        }79    },80    {81        'TYPE': 'COUNT',82        'OPTIONS': {83            'binary': True,84            'ngram_range': (1, 1)85        }86    },87    {88        'TYPE': 'TFIDF',89        'OPTIONS': {90            'binary': True,91            'ngram_range': (1, 2)92        }93    },94    {95        'TYPE': 'COUNT',96        'OPTIONS': {97            'binary': True,98            'ngram_range': (1, 2)99        }100    },101    {102        'TYPE': 'TFIDF',103        'OPTIONS': {104            'binary': True,105            'ngram_range': (1, 3)106        }107    },108    {109        'TYPE': 'COUNT',110        'OPTIONS': {111            'binary': True,112            'ngram_range': (1, 3)113        }114    }115]116CLASSIFIERS = [117    {118        'TYPE': 'RIDGE',119        'OPTIONS': {}120    },121    {122        'TYPE': 'LOGISTIC_REGRESSION',123        'OPTIONS': {}124    },125    # {126    #     'TYPE': 'RANDOM_FOREST',127    #     'OPTIONS': {}128    # },129    # {130    #     'TYPE': 'DECISION_TREE',131    #     'OPTIONS': {}132    # },133    {134        'TYPE': 'SVC',135        'OPTIONS': {}136    },137    # {138    #     'TYPE': 'SGD',139    #     'OPTIONS': {}140    # }141]142SEED = 7143KFOLD = 10144for VECTORIZER in VECTORIZERS:145    for CLASSIFIER in CLASSIFIERS:146        for algorithm_id, preprocessing_algorithm in PREPROCESSING_ALGORITHMS.items():147            LOG_DICT = {148                'UUID': str(uuid.uuid4()),149                'PREPROCESSING_ALGORITHM_UUID': algorithm_id,150                'PREPROCESSING_ALGORITHM': preprocessing_algorithm,151                'VECTORIZER': VECTORIZER,152                'CLASSIFIER': CLASSIFIER,153                'KFOLD_HISTORY': []154            }155            kfold = StratifiedKFold(n_splits=KFOLD, shuffle=True, random_state=SEED)156            train_data['preprocessed'] = tweets_preprocessor.preprocess(157                train_data.text,158                preprocessing_algorithm,159                keywords=train_data.keyword,160                locations=train_data.location161            )162            test_data['preprocessed'] = tweets_preprocessor.preprocess(163                test_data.text,164                preprocessing_algorithm,165                keywords=test_data.keyword,166                locations=test_data.location167            )168            inputs = train_data['preprocessed']169            targets = train_data['target']170            k = 0171            for train, validation in kfold.split(inputs, targets):172                vectorizer = Sklearn.VECTORIZERS[VECTORIZER['TYPE']](**VECTORIZER['OPTIONS'])173                x_train = vectorizer.fit_transform(inputs[train]).todense()174                y_train = targets[train]175                x_val = vectorizer.transform(inputs[validation]).todense()176                y_val = targets[validation]177                x_test = vectorizer.transform(test_data.preprocessed).todense()178                y_test = test_data.target.values179                classifier = Sklearn.CLASSIFIERS[CLASSIFIER['TYPE']](**CLASSIFIER['OPTIONS'])180                try:181                    classifier.fit(x_train, y_train)182                    train_score = round(classifier.score(x_train, y_train), 6)183                    val_score = round(classifier.score(x_val, y_val), 6)184                    test_score = round(classifier.score(x_test, y_test), 6)185                    history = {186                        'train_score': train_score,187                        'val_score': val_score,188                        'test_score': test_score189                    }190                except:191                    history = 'error'192                LOG_DICT['KFOLD_HISTORY'].append(history)193                print(CLASSIFIER['TYPE'], VECTORIZER, history, k)194                log_classifier(LOG_DICT)...aws_s3_tile_cp.py
Source:aws_s3_tile_cp.py  
1# coding=utf-82import argparse3import botocore4import boto35import datetime6import hashlib7import logging8import mercantile9import os10import pytz11import time12import threading13from mercantile import Tile14from multiprocessing.dummy import Pool15logging.basicConfig(level=logging.INFO)16# Quieting boto messages down a little17logging.getLogger('boto3.resources.action').setLevel(logging.WARNING)18logging.getLogger('botocore').setLevel(logging.WARNING)19logging.getLogger('marblecutter').setLevel(logging.WARNING)20logging.getLogger('marblecutter.mosaic').setLevel(logging.WARNING)21logging.getLogger('marblecutter.sources').setLevel(logging.WARNING)22logger = logging.getLogger('batchtiler')23if os.environ.get('VERBOSE'):24    logger.setLevel(logging.DEBUG)25THREAD_LOCAL = threading.local()26def initialize_thread():27    # Each thread needs its own boto3 Session object - it's not threadsafe28    THREAD_LOCAL.boto_session = boto3.session.Session()29    THREAD_LOCAL.s3_client = THREAD_LOCAL.boto_session.client('s3')30# Don't copy tiles that exist and are newer than this cutoff datetime31# in yyyy-mm-ddThh:mm:ss format (in UTC)32CUTOFF_DATE = pytz.UTC.localize(datetime.datetime.strptime(33    os.environ.get('CUTOFF_DATE'),34    '%Y-%m-%dT%H:%M:%S'35)) if os.environ.get('CUTOFF_DATE') else None36# Only copy these tile types37ONLY_COPY = os.environ.get('ONLY_COPY').split(',') if os.environ.get('ONLY_COPY') else None38# The number of threads in the pool communicating with AWS39POOL_SIZE = int(os.environ.get('POOL_SIZE', '12'))40POOL = Pool(POOL_SIZE, initializer=initialize_thread)41RENDER_COMBINATIONS = [42    ("normal", ".png"),43    ("terrarium", ".png"),44    ("geotiff", ".tif"),45]46def s3_key(key_prefix, tile_type, tile, key_suffix, include_hash):47    key = '{}/{}/{}/{}{}'.format(48        tile_type,49        tile.z,50        tile.x,51        tile.y,52        key_suffix,53    )54    if include_hash:55        h = hashlib.md5(key).hexdigest()[:6]56        key = '{}/{}'.format(57            h,58            key,59        )60    if key_prefix:61        key = '{}/{}'.format(key_prefix, key)62    return key63def head_object(s3, bucket, key):64    """ Head the given object and return the result if it exists.65    Returns `None` if it doesn't exist. """66    try:67        return s3.head_object(68            Bucket=bucket,69            Key=key,70        )71    except botocore.exceptions.ClientError as e:72        # If a client error is thrown, then check that it was a 404 error.73        # If it was a 404 error, then the object does not exist.74        error_code = int(e.response['Error']['Code'])75        if error_code == 404:76            return None77        raise78def copy_tile(tile, remove_hash, from_s3, to_s3):79    from_bucket, from_prefix = from_s380    to_bucket, to_prefix = to_s381    s3 = THREAD_LOCAL.s3_client82    for (type, ext) in RENDER_COMBINATIONS:83        from_key = s3_key(from_prefix, type, tile, ext, remove_hash)84        to_key = s3_key(to_prefix, type, tile, ext, False)85        if ONLY_COPY and type not in ONLY_COPY:86            logger.debug(87                'Skipping copy to s3://%s/%s because '88                'type %s not in %s',89                to_bucket, to_key,90                type, ONLY_COPY,91            )92            continue93        tries = 094        wait = 1.095        while True:96            try:97                tries += 198                if CUTOFF_DATE:99                    # Check if the tile that we're copying to already exists100                    # and if its newer than the specified cutoff date101                    obj_head_resp = head_object(s3, to_bucket, to_key)102                    if obj_head_resp and obj_head_resp['LastModified'] >= CUTOFF_DATE:103                        logger.debug(104                            'Skipping copy to s3://%s/%s because '105                            'last modified %s >= %s',106                            to_bucket, to_key,107                            obj_head_resp['LastModified'].isoformat(),108                            CUTOFF_DATE.isoformat(),109                        )110                        # This is a break (instead of a return) so that we111                        # continue with the outer for loop112                        break113                s3.copy_object(114                    Bucket=to_bucket,115                    Key=to_key,116                    CopySource={117                        'Bucket': from_bucket,118                        'Key': from_key,119                    }120                )121                logger.debug(122                    "Copied s3://%s/%s to s3://%s/%s at try %s",123                    from_bucket, from_key,124                    to_bucket, to_key,125                    tries,126                )127                break128            except botocore.vendored.requests.exceptions.ConnectionError as e:129                logger.info(130                    "%s received, try %s, while copying "131                    "s3://%s/%s to s3://%s/%s, waiting %0.1f sec",132                    e, tries,133                    from_bucket, from_key,134                    to_bucket, to_key,135                    wait,136                )137                time.sleep(wait)138                wait = min(30.0, wait * 2.0)139            except botocore.exceptions.CredentialRetrievalError as e:140                logger.info(141                    "%s received, try %s, while copying "142                    "s3://%s/%s to s3://%s/%s, waiting %0.1f sec",143                    e, tries,144                    from_bucket, from_key,145                    to_bucket, to_key,146                    wait,147                )148                time.sleep(wait)149                wait = min(30.0, wait * 2.0)150            except botocore.exceptions.ClientError as e:151                error_code = str(e.response.get('Error', {}).get('Code'))152                if error_code in ('SlowDown', '503'):153                    logger.info(154                        "%s received, try %s, while copying "155                        "s3://%s/%s to s3://%s/%s, waiting %0.1f sec",156                        error_code, tries,157                        from_bucket, from_key,158                        to_bucket, to_key,159                        wait,160                    )161                    time.sleep(wait)162                    wait = min(30.0, wait * 2.0)163                elif error_code == 'NoSuchKey':164                    logger.warn(165                        "NoSuchKey received while copying "166                        "s3://%s/%s to s3://%s/%s (skipping copy)",167                        from_bucket, from_key,168                        to_bucket, to_key,169                    )170                    break171                else:172                    raise173def tile_exc_wrapper(tile, remove_hash, from_s3, to_s3):174    try:175        copy_tile(tile, remove_hash, from_s3, to_s3)176    except Exception:177        logger.exception('Error while processing tile %s', tile)178        raise179def queue_render(tile, remove_hash, from_s3, to_s3):180    logger.debug('Enqueueing render for tile %s', tile)181    POOL.apply_async(182        tile_exc_wrapper,183        args=[tile, remove_hash, from_s3, to_s3]184    )185def queue_tile(tile, max_zoom, remove_hash, from_s3, to_s3):186    queue_render(tile, remove_hash, from_s3, to_s3)187    if tile.z < max_zoom:188        for child in mercantile.children(tile):189            queue_tile(child, max_zoom, remove_hash, from_s3, to_s3)190if __name__ == "__main__":191    parser = argparse.ArgumentParser()192    parser.add_argument('x', type=int)193    parser.add_argument('y', type=int)194    parser.add_argument('zoom', type=int)195    parser.add_argument('max_zoom', type=int)196    parser.add_argument('from_bucket')197    parser.add_argument('to_bucket')198    parser.add_argument('--from_prefix')199    parser.add_argument('--to_prefix')200    parser.add_argument('--remove_hash', dest='remove_hash', action='store_true', default=False)201    args = parser.parse_args()202    root = Tile(args.x, args.y, args.zoom)203    logger.info('Copying tiles from root tile %s to zoom %s at '204                's3://%s/%s to s3://%s/%s',205                root, args.max_zoom, args.from_bucket, args.from_prefix or '',206                args.to_bucket, args.to_prefix or '')207    logger.info('Running %s processes', POOL_SIZE)208    start_time = time.time()209    queue_tile(root, args.max_zoom, args.remove_hash,210               (args.from_bucket, args.from_prefix),211               (args.to_bucket, args.to_prefix))212    est_copies = (4**(args.max_zoom - args.zoom)) * len(RENDER_COMBINATIONS)213    POOL.close()214    POOL.join()215    end_time = time.time()216    logger.info('Done processing pyramid %s to zoom %s (%d ops in %0.1f sec)',...preprocessing_algorithms.py
Source:preprocessing_algorithms.py  
1PREPROCESSING_ALGORITHMS = {2    '1258a9d2-111e-4d4a-acda-852dd7ba3e88': {3        'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,4        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,5        'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,6        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,7        'join': False},8    '60314ef9-271d-4865-a7db-6889b1670f59': {9        'add_link_flag': False, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': False,10        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,11        'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,12        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,13        'join': False},14    '4c2e484d-5cb8-4e3e-ba7b-679ae7a73fca': {15        'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,16        'add_keyword_flag': True, 'add_location_flag': True, 'remove_links': True, 'remove_users': True,17        'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,18        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,19        'join': False},20    '8b7db91c-c8bf-40f2-986a-83a659b63ba6': {21        'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': False,22        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,23        'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,24        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,25        'join': False},26    '7bc816a1-25df-4649-8570-0012d0acd72a': {27        'add_link_flag': False, 'add_user_flag': False, 'add_hash_flag': False, 'add_number_flag': False,28        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,29        'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,30        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,31        'join': False},32    'a85c8435-6f23-4015-9e8c-19547222d6ce': {33        'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,34        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,35        'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': False,36        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,37        'join': False},38    'b054e509-4f04-44f2-bcf9-14fa8af4eeed': {39        'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,40        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,41        'remove_hash': True, 'unslang': True, 'split_words': False, 'stem': False, 'remove_punctuations': False,42        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,43        'join': False},44    '2e359f0b-bfb9-4eda-b2a4-cd839c122de6': {45        'add_link_flag': False, 'add_user_flag': False, 'add_hash_flag': False, 'add_number_flag': False,46        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,47        'remove_hash': True, 'unslang': True, 'split_words': False, 'stem': False, 'remove_punctuations': False,48        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,49        'join': False},50    '71bd09db-e104-462d-887a-74389438bb49': {51        'add_link_flag': False, 'add_user_flag': False, 'add_hash_flag': False, 'add_number_flag': False,52        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,53        'remove_hash': True, 'unslang': True, 'split_words': False, 'stem': False, 'remove_punctuations': True,54        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,55        'join': False},56    'd3cc3c6e-10de-4b27-8712-8017da428e41': {57        'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,58        'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,59        'remove_hash': True, 'unslang': True, 'split_words': False, 'stem': False, 'remove_punctuations': True,60        'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,61        'join': False}62}63def get_preprocessing_algorithm(alg_id=None, join=False):64    if alg_id is None:65        if not join:66            return PREPROCESSING_ALGORITHMS.copy()67        else:68            a = {}69            for key, value in PREPROCESSING_ALGORITHMS.items():70                a[key] = value71                a[key]['join'] = True72            return a73    for key, alg in PREPROCESSING_ALGORITHMS.items():74        if alg_id in key:75            if not join:76                return alg.copy()77            else:78                a = alg.copy()79                a['join'] = True...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
