Best Python code snippet using assertpy_python
vtfuncs.py
Source:vtfuncs.py  
1#!/bin/env python2# -*- coding: utf-8 -*-3"""4vtfuncs.py5Author: Francis Windram6Created: 27/03/197Docstr: This file contains many generic VecTraits functions with which to perform validation and other jobs8"""9import logging10from cStringIO import StringIO11from gluon.validators import *12from gluon import current13import datetime14from distutils.util import strtobool15import hashlib16from random import randint17from os.path import basename18import csv19# make it so this is only turned on when web2py is not the context20moduledebug = False21if moduledebug:22    import logzero23    logger = logzero.setup_logger(logfile="/tmp/vtfuncsdebug.log",24                                  formatter=logging.Formatter(25                                      '%(asctime)s - %(levelname)-7s - %(funcName)s - %(message)s')26                                  )27else:28    logger = logging.getLogger("web2py.app.vbdp")29def asciilogo():30    out = """31    32                                                    /.                                                                 33                                                 #@%/% ,%%*#                                                           34                                               /@/  ,@@&.  %                                                           35                                     *%&@@&(  %%    #,&/   %                                                           36               .&@@/  .#,#/      ,&@@@@@@@@@@@/   (/  ,&.  &.  .*%@&(/(&@&%%%(*.*/                                     37                &%.(@#/*   ,%* .&@@@@@@@@@@@@@&&@@&&%#(@@%*&%@@&/  .*#%%&@,      #                                     38                %(   %@*     *@@@@@@@@@@@@@@%.         (@@@%(((#%&@@/*.,@#     ./                                    39             ,# #(   .@@/    .@@@@@@@@@@@@@@@@@@@@@@@*&@@@,             .*%@@&/.  %*                                   40              .&%%   ,. ,@(  .@@@@@@@@@@                %@@.                 (&. .(%@(.                                41               ,@@,  #    /@(.@@@@@@@@@@ VECTORBYTE.ORG @@@@@&(,              .&/     .(                             42         ,#/.   #@@/,%      %@#/#@@@@@@@                @@@@@@@@@@@&(,          #&        %(                           43            *%%.(@*,@@      /@@@&@@@@@@@@@@@(&@@@@@@@@@@@@@@@@@@@@@@@@@&(/,,,,,%@#*,*/(((/.                          44               ,@@&@%.,/* .@@@@@@@@*.@@@&*.       *&@@@@@@@@@@@@@@@@@@@@@@#.      #@&,&,                               45                (@,#. ,/#&@@@@@@@@@%  (@*           ,&@@@@@@@@@@@@@@@@@@@@@@#      .&@@%                               46                (@//       #@@@@@@@@&                  (@@@@@@@@@@@@@@@@@@@@@@,      #@@/                              47                %@&.        *@@@@@&   #,                  (@@@@@@@@@@@@@@@@@@@@@(     ,@@*                             48               .&@/           *&@@/   (,                   @(./%@@@@@@@@@@@@@@@@@@/    ,@@                             49               ,@&.       .,,, (@/   #.                    &(      *(%&@@@@@@@@@@@@@@/  ,@/                            50               *@(            ,,#  //                      %&               ..,***,,     .@%.                          51               %&              /@%.                         %@#                            ,%@@#,                      52              ,@*              %%                            ,&@&*                            ./&@&/.                  53           .(@@/              .&,                               /&@#,                             ,#@@&(.              54      .(&@@%*.                *&.                                  /&@@#,                             ./&@,          55*#@@*                       #(                                        ./&@@&%#(,                          .*#&@@&%#(/56                              &,                                               ...                                  ...57                              &                                                                                        58                             .(                                                                                        59                             *,                                                                                        60                             #                                                                                         61                            .%                                                                                         62                            *#                                                                                         63                            #*                                                                                         64                            #.                                                                                         65                           ,#                                                                                          66                           (,                                                                                          67                           #.                                                                                          68                          .(                                                                                           69                          */                                                                                           70                          #,                                                                                           71                     ,&@@@@@,                                                                                          72                   ,@@@@@@@@@%                                                                                         73                  ,@@@@@@@@@@@,                                                                                        74                  *@@@@@@@@@@@/                                                                                        75                  .&@@@@@@@@@@.                                                                                        76                   .&@@@@@@@@#                                                                                         77                      *#%#*.78                      """79    return out80def list_to_dict(h, l):81    """82    Convert a set of two lists to a dictionary83        >>> list_to_dict(["test1", "test2"], [1, 2])84        {'test1': 1, 'test2': 2}85        >>> list_to_dict(["test1", "test2"], ["NA", "nan"])86        {'test1': '', 'test2': ''}87    """88    try:89        l = ["" if (x in {"", "NA", "na", "NaN", "nan"}) else x for x in l]90        return dict(zip(h, l))91    except TypeError:92        raise TypeError("Both headers and values must be in list format")93def data_to_dicts(h, d):94    """95    Convert a list of lists to a list of dicts given a header list96        >>> data_to_dicts(["test1", "test2"], [[1,2], [3,4]])97        [{'test1': 1, 'test2': 2}, {'test1': 3, 'test2': 4}]98        >>> data_to_dicts(["test1", "test2"], [[1,2], [3]])99        [{'test1': 1, 'test2': 2}, {'test1': 3}]100    """101    return [list_to_dict(h, x) for x in d]102def placeholder(x):103    def ph2(y):104        return [y, None]105    return ph2106class IS_BOOL():107    """108    Determines that the argument is (or can be represented as) an bool.109    True values are y, yes, t, true, on and 1.110    False values are "", n, no, f, false, off and 0.111    (Values are case-insensitive)112    Example:113        Used as::114            INPUT(_type='text', _name='name', requires=IS_BOOL())115            >>> IS_BOOL()(True)116            (True, None)117            >>> IS_BOOL()(False)118            (False, None)119            >>> IS_BOOL()("True")120            (True, None)121            >>> IS_BOOL()("False")122            (False, None)123            >>> IS_BOOL()("Yes")124            (True, None)125            >>> IS_BOOL()(100)126            (100, 'enter a boolean')127    """128    def __init__(129        self,130        error_message=None,131    ):132        if not error_message:133            self.error_message = "enter a boolean"134        else:135            self.error_message = error_message136    def __call__(self, value):137        # If value converts nicely to a bool138        try:139            v = bool(strtobool(str(value).lower()))140            return (v, None)141        except ValueError:142            pass143        return (value, self.error_message)144def validate_vectraits_rowdict(rowdict, buf):145    """146    :param rowdict:147    :param buf:148    :return:149    >>> validate_vectraits_rowdict({'originaltraitname': 'test'}, StringIO())150    (set([]), 0)151    >>> validate_vectraits_rowdict({'fail': 1}, StringIO())152    (set(['fail']), 1)153    >>> validate_vectraits_rowdict({'fail': 1, 'originaltraitname': 'test'}, StringIO())154    (set(['fail']), 1)155    >>> validate_vectraits_rowdict({'fail': 1, 'fail2': 2}, StringIO())156    (set(['fail', 'fail2']), 2)157    >>> validate_vectraits_rowdict({'originaltraitunit': "x"*256}, StringIO())158    (set(['originaltraitunit']), 1)159    >>> validate_vectraits_rowdict({'originaltraitvalue': "x"}, StringIO())160    (set(['originaltraitvalue']), 1)161    >>> validate_vectraits_rowdict({'originaltraitvalue': None}, StringIO())162    (set(['originaltraitvalue']), 2)163    >>> validate_vectraits_rowdict({'replicates': -1}, StringIO())164    (set(['replicates']), 1)165    >>> validate_vectraits_rowdict({'published': "1"}, StringIO())166    (set([]), 0)167    >>> validate_vectraits_rowdict({'published': "try again"}, StringIO())168    (set(['published']), 1)169    >>> validate_vectraits_rowdict({'contributoremail': ""}, StringIO())170    (set(['contributoremail']), 2)171    >>> validate_vectraits_rowdict({'contributoremail': "mrfrancis"}, StringIO())172    (set(['contributoremail']), 1)173    >>> validate_vectraits_rowdict({'locationdate': "04/08/1992"}, StringIO())174    (set([]), 0)175    >>> validate_vectraits_rowdict({'locationdate': "4 August 92"}, StringIO())176    (set(['locationdate']), 1)177    """178    validator_dict = {179        'originalid': [IS_NOT_EMPTY(), IS_LENGTH(20)],180        'originaltraitname': [IS_LENGTH(255)],181        'originaltraitdef': [],182        'standardisedtraitname': [IS_LENGTH(255)],183        'standardisedtraitdef': [],184        'originaltraitvalue': [IS_NOT_EMPTY(), IS_FLOAT_IN_RANGE(-1e100, 1e100)],185        'originaltraitunit': [IS_NOT_EMPTY(), IS_LENGTH(255)],186        'originalerrorpos': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],187        'originalerrorneg': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],188        'originalerrorunit': [IS_LENGTH(255)],189        'standardisedtraitvalue': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],190        'standardisedtraitunit': [IS_LENGTH(255)],191        'standardisederrorpos': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],192        'standardisederrorneg': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],193        'standardisederrorunit': [IS_LENGTH(255)],194        'replicates': [IS_INT_IN_RANGE(1, 2 ** 31)],195        'habitat': [IS_LENGTH(20)],196        'labfield': [IS_LENGTH(11)],197        'arenavalue': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],198        'arenaunit': [IS_LENGTH(255)],199        'arenavaluesi': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],200        'arenaunitsi': [IS_LENGTH(255)],201        'ambienttemp': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],202        'ambienttempmethod': [IS_LENGTH(255)],203        'ambienttempunit': [IS_LENGTH(255)],204        'ambientlight': [IS_LENGTH(255)],205        'ambientlightunit': [IS_LENGTH(255)],206        'secondstressor': [IS_LENGTH(255)],207        'secondstressordef': [IS_LENGTH(255)],208        'secondstressorvalue': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],209        'secondstressorunit': [IS_LENGTH(255)],210        'timestart': [IS_LENGTH(255)],211        'timeend': [IS_LENGTH(255)],212        'totalobstimevalue': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],213        'totalobstimeunit': [IS_LENGTH(255)],214        'totalobstimevaluesi': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],215        'totalobstimeunitsi': [IS_LENGTH(255)],216        'totalobstimenotes': [IS_LENGTH(255)],217        'resrepvalue': [IS_INT_IN_RANGE(-2 ** 31, 2 ** 31)],218        'resrepunit': [IS_LENGTH(255)],219        'resrepvaluesi': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],220        'resrepunitsi': [IS_LENGTH(255)],221        'location': [IS_NOT_EMPTY()],222        'locationtype': [IS_LENGTH(255)],223        'originallocationdate': [IS_LENGTH(255)],224        'locationdate': [IS_DATE(format='%d/%m/%Y', error_message='must be DD/MM/YYYY!')],225        'locationdateprecision': [IS_NOT_EMPTY(), IS_INT_IN_RANGE(0, 6)],226        'coordinatetype': [IS_LENGTH(255)],227        'latitude': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],228        'longitude': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],229        'interactor1': [IS_LENGTH(255)],  # , Custom Validator, at least one filled:],230        'interactor1common': [IS_LENGTH(255)],  # , Custom Validator, at least one filled:],231        'interactor1wholepart': [IS_LENGTH(255)],232        'interactor1wholeparttype': [IS_LENGTH(255)],233        'interactor1number': [IS_LENGTH(255)],234        'interactor1kingdom': [IS_LENGTH(50)],235        'interactor1phylum': [IS_LENGTH(50)],236        'interactor1class': [IS_LENGTH(50)],237        'interactor1order': [IS_LENGTH(50)],238        'interactor1family': [IS_LENGTH(50)],239        'interactor1genus': [IS_LENGTH(50)],240        'interactor1species': [IS_LENGTH(255)],241        'interactor1stage': [IS_LENGTH(255)],242        'interactor1temp': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],243        'interactor1tempunit': [IS_LENGTH(255)],244        'interactor1tempmethod': [IS_LENGTH(255)],245        'interactor1growthtemp': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],246        'interactor1growthtempunit': [IS_LENGTH(255)],247        'interactor1growthdur': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],248        'interactor1growthdurunit': [IS_LENGTH(255)],249        'interactor1growthtype': [IS_LENGTH(255)],250        'interactor1acc': [IS_LENGTH(255)],251        'interactor1acctemp': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],252        'interactor1acctempnotes': [IS_LENGTH(255)],253        'interactor1acctime': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],254        'interactor1acctimenotes': [IS_LENGTH(255)],255        'interactor1acctimeunit': [IS_LENGTH(255)],256        'interactor1origtemp': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],257        'interactor1origtempnotes': [IS_LENGTH(255)],258        'interactor1origtime': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],259        'interactor1origtimenotes': [IS_LENGTH(255)],260        'interactor1origtimeunit': [IS_LENGTH(255)],261        'interactor1equilibtimevalue': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],262        'interactor1equilibtimeunit': [IS_LENGTH(255)],263        'interactor1size': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],264        'interactor1sizeunit': [IS_LENGTH(255)],265        'interactor1sizetype': [IS_LENGTH(255)],266        'interactor1sizesi': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],267        'interactor1sizeunitsi': [IS_LENGTH(255)],268        'interactor1denvalue': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],269        'interactor1denunit': [IS_LENGTH(255)],270        'interactor1dentypesi': [IS_LENGTH(255)],271        'interactor1denvaluesi': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],272        'interactor1denunitsi': [IS_LENGTH(255)],273        'interactor1massvaluesi': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],274        'interactor1massunitsi': [IS_LENGTH(255)],275        'interactor2': [IS_LENGTH(255)],276        'interactor2common': [IS_LENGTH(255)],277        'interactor2kingdom': [IS_LENGTH(50)],278        'interactor2phylum': [IS_LENGTH(50)],279        'interactor2class': [IS_LENGTH(50)],280        'interactor2order': [IS_LENGTH(50)],281        'interactor2family': [IS_LENGTH(50)],282        'interactor2genus': [IS_LENGTH(50)],283        'interactor2species': [IS_LENGTH(255)],284        'interactor2stage': [IS_LENGTH(255)],285        'interactor2temp': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],286        'interactor2tempunit': [IS_LENGTH(255)],287        'interactor2tempmethod': [IS_LENGTH(255)],288        'interactor2growthtemp': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],289        'interactor2growthtempunit': [IS_LENGTH(255)],290        'interactor2growthdur': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],291        'interactor2growthdurunit': [IS_LENGTH(255)],292        'interactor2growthtype': [IS_LENGTH(255)],293        'interactor2acc': [IS_LENGTH(255)],294        'interactor2acctemp': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],295        'interactor2acctempnotes': [IS_LENGTH(255)],296        'interactor2acctime': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],297        'interactor2acctimenotes': [IS_LENGTH(255)],298        'interactor2acctimeunit': [IS_LENGTH(255)],299        'interactor2origtemp': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],300        'interactor2origtempnotes': [IS_LENGTH(255)],301        'interactor2origtime': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],302        'interactor2origtimenotes': [IS_LENGTH(255)],303        'interactor2origtimeunit': [IS_LENGTH(255)],304        'interactor2equilibtimevalue': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],305        'interactor2equilibtimeunit': [IS_LENGTH(255)],306        'interactor2size': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],307        'interactor2sizeunit': [IS_LENGTH(255)],308        'interactor2sizetype': [IS_LENGTH(255)],309        'interactor2sizesi': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],310        'interactor2sizeunitsi': [IS_LENGTH(255)],311        'interactor2denvalue': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],312        'interactor2denunit': [IS_LENGTH(255)],313        'interactor2dentypesi': [IS_LENGTH(255)],314        'interactor2denvaluesi': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],315        'interactor2denunitsi': [IS_LENGTH(255)],316        'interactor2massvaluesi': [IS_FLOAT_IN_RANGE(-1e100, 1e100)],317        'interactor2massunitsi': [IS_LENGTH(255)],318        'physicalprocess': [IS_LENGTH(255)],319        'physicalprocess_1': [IS_LENGTH(255)],320        'physicalprocess_2': [IS_LENGTH(255)],321        'citation': [IS_NOT_EMPTY(), IS_LENGTH(1500)],322        'doi': [IS_LENGTH(255)],323        'published': [IS_NOT_EMPTY(), IS_BOOL()],324        'embargorelease': [IS_NOT_EMPTY(), IS_DATE(format='%d/%m/%Y', error_message='must be DD/MM/YYYY!')],325        'figuretable': [IS_LENGTH(255)],326        'notes': [IS_LENGTH(2055)],327        'submittedby': [IS_NOT_EMPTY(), IS_LENGTH(255)],328        'contributoremail': [IS_NOT_EMPTY(), IS_EMAIL(), IS_LENGTH(255)],329    }330    notnull_cols = {331        "originalid",332        "originaltraitvalue",333        "originaltraitunit",334        "location",335        "locationdateprecision",336        "citation",337        "published",338        "embargorelease",339        "submittedby",340        "contributoremail",341    }342    notnull_trackerset = set()343    failcounter = 0344    failed_columns = set()345    for column in rowdict.keys():346        if column not in notnull_cols and rowdict[column] in {"", "NA", "na", "NaN", "nan"}:347            # Don't fail empty strings if they are not required columns348            continue349        if column in notnull_cols:350            notnull_trackerset.add(column)351        try:352            for v in validator_dict[column]:  # Get validator from validator list353                if v(rowdict[column])[1]:  # If it fails...354                    failed_columns.add(column)  # Append to failed column set355                    failcounter += 1356                    # Write to log and report357                    logger.info('Column "{}" failed validator "{}, value:"{}"'.format(column, v.__class__.__name__,358                                                                                      rowdict[column]))359                    buf.write('    Column "{}" failed validator "{}, value:"{}"\n'.format(column, v.__class__.__name__,360                                                                                          rowdict[column]))361                else:362                    logger.debug('Column "{}" passed validator "{}"'.format(column, v.__class__.__name__))363        except KeyError:364            failed_columns.add(column)  # If failed column does not exist, add to failed set365            failcounter += 1366            # Write to log and report367            logger.info('Invalid column name: "{}"'.format(column))368            buf.write('    Invalid column name: "{}"\n'.format(column))369    # Validate that either interactor1 or interactor1common is filled.370    missing_cols = notnull_cols.difference(notnull_trackerset)371    if missing_cols:372        failed_columns = failed_columns | missing_cols373        failcounter += len(missing_cols)374        logger.info('Missing essential columns: {}'.format(" ,".join(missing_cols)))375        buf.write('    Missing essential columns: {}\n'.format(", ".join(missing_cols)))376    return failed_columns, failcounter377def validate_vectraits(data, filename='test.csv'):378    """379    Validate a list of vectraits data and determine whether the form is correct for upload.380    :param data:381    :return: A dict of all columns which contain errors and on which line/s they occur382    >>> validate_vectraits([["test1", "originaltraitname"], [1,2], [3,4]])383    """384    # Print setup to log385    start = datetime.datetime.now()386    logger.info('{}'.format('-' * (38 + len(filename))))387    logger.info('  Validation of {} started at {}'.format(filename, start.strftime('%H:%M:%S')))388    logger.debug('  File length: {} rows'.format(len(data) - 1))389    logger.info('{}'.format('-' * (38 + len(filename))))390    # Print report setup to report buffer391    report = StringIO()392    report.write('\n{}\n'.format('-' * (17 + len(filename))))393    report.write('  VALIDATION REPORT\n\n')394    report.write('    File name: {}\n'.format(filename))395    report.write('  File length: {} rows\n'.format(len(data) - 1))396    report.write('      Started: {}\n'.format(start.strftime('%d-%m-%y %H:%M:%S')))397    report.write('{}\n\n'.format('-' * (17 + len(filename))))398    # Isolate header399    header = data.pop(0)400    errcounter = 0401    errlinecounter = 0402    failed = False403    # Validate header to make sure all columns are present, and reject if so.404    output = data_to_dicts(header, data)405    failed_dict = {}406    long_dataset = False407    log_triggers = {}408    if len(output) > 500:409        long_dataset = True410        trigger_list = [int(len(output) * (x / float(10))) - 1 for x in range(0, 11)]411        trigger_list[0] = 0412        percentile_list = [x * 10 for x in range(0, 11)]413        log_triggers = dict(zip(trigger_list, percentile_list))414    # Validate rows against validators using validate_vectraits_rows415    for i, item in enumerate(output):416        if long_dataset:417            if i in log_triggers.keys():418                logger.info('Validating row {}/{} ({}%)...'.format(i + 1, len(output), log_triggers[i]))419        else:420            logger.info('Validating row {}...'.format(i + 1))421        report.write('Validating row {}...\n'.format(i + 1))422        failed_items, errdelta = validate_vectraits_rowdict(item, report)423        if errdelta > 0:424            errcounter += errdelta425            errlinecounter += 1426            for entry in failed_items:427                try:428                    failed_dict[entry].append(i + 1)429                except KeyError:430                    failed_dict[entry] = [i + 1]431    # Return Validation dictionary432    # Finish up in log433    logger.info('{}'.format('-' * (38 + len(filename))))434    end = datetime.datetime.now()435    logger.info('Validation finished at {}'.format(end.strftime('%H:%M:%S')))436    logger.info('         Errors: {}'.format(errcounter))437    logger.info('   Failed lines: {}'.format(errlinecounter))438    time_elapsed = end - start439    logger.info('Validation time: {}'.format(time_elapsed))440    logger.info('{}'.format('-' * (38 + len(filename))))441    # Finish up in report442    report.write('\n{}\n'.format('-' * (17 + len(filename))))443    report.write('  VALIDATION COMPLETE\n\n')444    report.write('         Ended: {}\n'.format(end.strftime('%H:%M:%S')))445    report.write('        Errors: {}\n'.format(errcounter))446    report.write('  Failed lines: {}\n'.format(errlinecounter))447    report.write('  Time elapsed: {}\n'.format(time_elapsed))448    report.write('{}\n\n'.format('-' * (17 + len(filename))))449    report.write('  COLUMN REPORT  \n\n')450    logger.info(failed_dict)  # TODO: return this dict in the correct format.451    # TODO: convert this dict to a nice report and put into report buffer.452    for x, y in failed_dict.iteritems():453        if len(y) == 1:454            report.write('"{}" failed on row: {}\n'.format(x, y))455        else:456            report.write('"{}" failed on rows: {}\n'.format(x, y))457    if not randint(0, 19):458        report.write("\n\n\n")459        report.write(asciilogo())460    report_str = report.getvalue()461    if not long_dataset:462        logger.info(report_str)463    try:464        report.close()  # TODO: Just for now465    except ValueError:466        pass467    if errcounter:468        failed = True469    return report_str, failed470class DataIntegrityError(Exception):471    pass472class VTUploadError(Exception):473    pass474def upload_vectraits_dataset(csvpath, templatepath="../static/templates/vectraits_template.csv", logger=False):475    """476    :param csvpath:477    :param templatepath:478    :return:479    >>> upload_vectraits_dataset("../uploads/tests/missingcols.csv", "path/to/nothing.csv")480    False481    >>> upload_vectraits_dataset("../uploads/tests/passing.csv")482    True483    >>> upload_vectraits_dataset("../uploads/tests/passing_long.csv")484    True485    >>> upload_vectraits_dataset("../uploads/tests/missingcols.csv")486    True487    >>> upload_vectraits_dataset("../uploads/tests/missingcols_long.csv")488    True489    """490    def md5sum(filename, blocksize=65536):491        """492        Generate md5 hash of a file in a memory-efficient manner493        :param filename:    Path of file to be hashed494        :param blocksize:   Size of block to read in each loop (default 64kB)495        :return:            Hex representation of file hash (32B str)496        """497        hash = hashlib.md5()498        with open(filename, "rb") as f:499            for block in iter(lambda: f.read(blocksize), b""):500                hash.update(block)501        return hash.hexdigest()502    def noneadapter(x):503        """504        Return None rather than NaN to allow correct selection of null values in DAL.505        Seems hacky, but hopefully should be fast enough.506        """507        if pd.isna(x):508            return None509        return x510    import logzero511    if not logger:512        logger = logzero.setup_logger(logfile="logs/vtuploads.log",513                                      formatter=logging.Formatter(514                                          '%(asctime)s - %(levelname)-7s - %(funcName)s - %(message)s'),515                                      disableStderrLogger=True)516        logger.info("Turned on logger in upload_vectraits_dataset")517    # Load db connection518    db2 = current.db2519    # Simple check to make sure dataset hasn't been uploaded before.520    logger.info("Checking whether file is unique...")521    filemd5 = md5sum(csvpath)522    logger.debug("File hash = {}".format(filemd5))523    search = db2(524        (db2.dataset_hash.filehash == filemd5)525    ).select()526    if len(search) > 0:527        logger.error("File hash already in database!")528        raise VTUploadError("File hash {} for {} already in database".format(filemd5, csvpath))529    else:530        logger.info("New file! Processing...")531    logger.info("Starting data upload from {}".format(basename(csvpath)))532    import pandas as pd533    import numpy as np534    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #535    # â                                        DATA PREP                                        â #536    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #537    # Load csv538    logger.info("Opening data file...")539    try:540        datafile = pd.read_csv(csvpath)541    except IOError:542        logger.exception("Error opening data file {}".format(csvpath))543        raise VTUploadError("Error opening data file {}".format(csvpath))544    try:545        template = pd.read_csv(templatepath)546    except IOError:547        logger.exception("Error opening template file {}".format(templatepath))548        raise VTUploadError("Error opening template file {}".format(templatepath))549    # Check for missing columns, and if missing, insert with blank values550    logger.debug("datafile keys:{}".format(len(datafile.keys())))551    logger.debug("template keys:{}".format(len(template.keys())))552    difflist = list(set(template.keys()).difference(set(datafile.keys())))553    if difflist:554        logger.info("Creating missing columns: {}".format(difflist))555        missing_cols = pd.DataFrame(np.nan, index=range(0, len(datafile.index)), columns=difflist)556        logger.debug(missing_cols.dtypes)557        datafile = pd.concat([datafile, missing_cols], axis=1)558    # Create id fields in main table559    def idcat(*args):560        return " ".join([str(x) for x in args])561    datafile["locationidstr"] = np.vectorize(idcat)(datafile.location, datafile.locationtype,562                                                    datafile.originallocationdate, datafile.locationdate,563                                                    datafile.locationdateprecision, datafile.latitude,564                                                    datafile.longitude)565    datafile["traitdescripidstr"] = np.vectorize(idcat)(datafile.physicalprocess, datafile.physicalprocess_1,566                                                        datafile.physicalprocess_2)567    datafile["expcondidstr"] = np.vectorize(idcat)(datafile.replicates, datafile.habitat, datafile.labfield,568                                                   datafile.arenavalue, datafile.arenaunit, datafile.arenavaluesi,569                                                   datafile.arenaunitsi, datafile.resrepvalue, datafile.resrepunit,570                                                   datafile.resrepvaluesi, datafile.resrepunitsi)571    datafile["interactor1idstr"] = np.vectorize(idcat)(datafile.interactor1kingdom,572                                                       datafile.interactor1phylum,573                                                       datafile.interactor1class,574                                                       datafile.interactor1order,575                                                       datafile.interactor1family,576                                                       datafile.interactor1genus,577                                                       datafile.interactor1species)578    datafile["interactor2idstr"] = np.vectorize(idcat)(datafile.interactor2kingdom,579                                                       datafile.interactor2phylum,580                                                       datafile.interactor2class,581                                                       datafile.interactor2order,582                                                       datafile.interactor2family,583                                                       datafile.interactor2genus,584                                                       datafile.interactor2species)585    datafile["sourceinfoidstr"] = np.vectorize(idcat)(datafile.originalid,586                                                      datafile.figuretable,587                                                      datafile.submittedby,588                                                      datafile.contributoremail,589                                                      datafile.citation,590                                                      datafile.doi,591                                                      datafile.published,592                                                      datafile.embargorelease)593    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #594    # â                                        LOCATIONS                                        â #595    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #596    # Extract unique locations597    logger.info("Extracting unique locations...")598    locationnames = ["location", "locationtype", "originallocationdate", "locationdate", "locationdateprecision",599                     "latitude", "longitude", "locationidstr"]600    locations = datafile[locationnames]601    locations_uni = locations.drop_duplicates().reset_index(drop=True)602    locations_uni["locationid"] = np.int(-1)603    # locations = locations.fillna(None)    # Cannot replace with None as None is a different type to float64...604    # logger.debug(locations_uni)605    inscount = 0606    foundcoconut = 0607    errcount = 0608    for index, entry in locations_uni.iterrows():609        locationid = np.nan610        search = db2(611            (db2.studylocation.locationtext == noneadapter(entry.location)) &612            (db2.studylocation.locationtype == noneadapter(entry.locationtype)) &613            (db2.studylocation.originallocationdate == noneadapter(entry.originallocationdate)) &614            (db2.studylocation.locationdate == noneadapter(entry.locationdate)) &615            (db2.studylocation.locationdateprecision == noneadapter(entry.locationdateprecision)) &616            (db2.studylocation.latitude == noneadapter(entry.latitude)) &617            (db2.studylocation.longitude == noneadapter(entry.longitude))618        ).select(db2.studylocation.locationid)619        # logger.debug(len(search))620        if len(search) < 1:621            # insert into db622            inscount += 1623            locationid = db2.studylocation.insert(locationtext=noneadapter(entry.location),624                                                  locationtype=noneadapter(entry.locationtype),625                                                  originallocationdate=noneadapter(entry.originallocationdate),626                                                  locationdate=noneadapter(entry.locationdate),627                                                  locationdateprecision=noneadapter(entry.locationdateprecision),628                                                  latitude=noneadapter(entry.latitude),629                                                  longitude=noneadapter(entry.longitude))630            logger.debug("Inserted location id {}".format(locationid))631        elif len(search) == 1:632            # Use the found id633            foundcoconut += 1634            locationid = search[0].locationid635        else:636            errcount += 1637            logger.warning("Ambiguous location entry provided:\n\n{}\n\n{}".format(entry, search))638            locationid = search[0].locationid639        locations_uni.ix[index, "locationid"] = locationid640    logger.debug("--- TABLE: studylocation ---")641    logger.debug("Inserted: {}".format(inscount))642    logger.debug("Found: {}".format(foundcoconut))643    logger.debug("Errors: {}".format(errcount))644    logger.debug("----------------------------")645    # left join locations_uni to main df on locationidstr646    locations_uni = locations_uni[["locationidstr", "locationid"]]647    datafile = pd.merge(datafile, locations_uni, on="locationidstr", how="left")648    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #649    # â                                        TAXONOMY                                         â #650    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #651    # Extract unique Taxonomy entries652    logger.info("Extracting unique taxonomic groups...")653    interactor1_names = ["interactor1kingdom", "interactor1phylum", "interactor1class", "interactor1order",654                         "interactor1family", "interactor1genus", "interactor1species", "interactor1idstr"]655    interactor2_names = ["interactor2kingdom", "interactor2phylum", "interactor2class", "interactor2order",656                         "interactor2family", "interactor2genus", "interactor2species", "interactor2idstr"]657    interactor1_tax = datafile[interactor1_names]658    interactor2_tax = datafile[interactor2_names]659    # Rename columns in both dfs and merge together660    interactor1_tax.columns = ["taxkingdom", "taxphylum", "taxclass", "taxorder",661                               "taxfamily", "taxgenus", "taxspecies", "interactoridstr"]662    interactor2_tax.columns = ["taxkingdom", "taxphylum", "taxclass", "taxorder",663                               "taxfamily", "taxgenus", "taxspecies", "interactoridstr"]664    interactor_tax = pd.concat([interactor1_tax, interactor2_tax])665    interactor_tax_uni = interactor_tax.drop_duplicates().reset_index(drop=True)666    interactor_tax_uni["interactorxid"] = np.int(-1)667    inscount = 0668    foundcoconut = 0669    errcount = 0670    for index, entry in interactor_tax_uni.iterrows():671        interactorxid = np.nan672        search = db2(673            (db2.taxonomy.taxkingdom == noneadapter(entry.taxkingdom)) &674            (db2.taxonomy.taxphylum == noneadapter(entry.taxphylum)) &675            (db2.taxonomy.taxclass == noneadapter(entry.taxclass)) &676            (db2.taxonomy.taxorder == noneadapter(entry.taxorder)) &677            (db2.taxonomy.taxfamily == noneadapter(entry.taxfamily)) &678            (db2.taxonomy.taxgenus == noneadapter(entry.taxgenus)) &679            (db2.taxonomy.taxspecies == noneadapter(entry.taxspecies))680        ).select(db2.taxonomy.taxid)681        if len(search) < 1:682            # insert into db683            inscount += 1684            interactorxid = db2.taxonomy.insert(taxkingdom=noneadapter(entry.taxkingdom),685                                                taxphylum=noneadapter(entry.taxphylum),686                                                taxclass=noneadapter(entry.taxclass),687                                                taxorder=noneadapter(entry.taxorder),688                                                taxfamily=noneadapter(entry.taxfamily),689                                                taxgenus=noneadapter(entry.taxgenus),690                                                taxspecies=noneadapter(entry.taxspecies))691            logger.debug("Inserted taxonomy id {}".format(interactorxid))692        elif len(search) == 1:693            # Use the found id694            foundcoconut += 1695            interactorxid = search[0].taxid696        else:697            errcount += 1698            logger.warning("Ambiguous taxonomy entry provided:\n\n{}\n\n{}".format(entry, search))699            interactorxid = search[0].taxid700        interactor_tax_uni.ix[index, "interactorxid"] = interactorxid701    logger.debug("----- TABLE: taxonomy ------")702    logger.debug("Inserted: {}".format(inscount))703    logger.debug("Found: {}".format(foundcoconut))704    logger.debug("Errors: {}".format(errcount))705    logger.debug("----------------------------")706    # left join interactor_tax_uni to main df on interactoridstr707    interactor_tax_uni = interactor_tax_uni[["interactoridstr", "interactorxid"]]708    # We need to use a slightly different method here as this foreignid gets left-joined twice, once for each spp709    datafile = pd.merge(datafile, interactor_tax_uni, left_on="interactor1idstr", right_on="interactoridstr",710                        how="left")711    datafile = datafile.rename(columns={"interactorxid": "interactor1id"})712    datafile = pd.merge(datafile, interactor_tax_uni, left_on="interactor2idstr", right_on="interactoridstr",713                        how="left")714    datafile = datafile.rename(columns={"interactorxid": "interactor2id"})715    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #716    # â                                        TRAITDESC                                        â #717    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #718    # Extract unique trait descriptions719    logger.info("Extracting unique trait descriptions...")720    traitdescrip_names = ["physicalprocess", "physicalprocess_1", "physicalprocess_2", "traitdescripidstr"]721    traitdescrips = datafile[traitdescrip_names]722    traitdescrips_uni = traitdescrips.drop_duplicates().reset_index(drop=True)723    traitdescrips_uni["traitdescriptionid"] = np.int(-1)724    inscount = 0725    foundcoconut = 0726    errcount = 0727    for index, entry in traitdescrips_uni.iterrows():728        traitdescriptionid = np.nan729        search = db2(730            (db2.traitdescription.physicalprocess == noneadapter(entry.physicalprocess)) &731            (db2.traitdescription.physicalprocess_1 == noneadapter(entry.physicalprocess_1)) &732            (db2.traitdescription.physicalprocess_2 == noneadapter(entry.physicalprocess_2))733        ).select(db2.traitdescription.traitdesid)734        if len(search) < 1:735            # insert into db736            inscount += 1737            traitdescriptionid = db2.traitdescription.insert(physicalprocess=noneadapter(entry.physicalprocess),738                                                             physicalprocess_1=noneadapter(entry.physicalprocess_1),739                                                             physicalprocess_2=noneadapter(entry.physicalprocess_2))740            logger.debug("Inserted traitdescrip id {}".format(traitdescriptionid))741        elif len(search) == 1:742            # Use the found id743            foundcoconut += 1744            traitdescriptionid = search[0].traitdesid745        else:746            errcount += 1747            logger.warning("Ambiguous trait description entry provided:\n\n{}\n\n{}".format(entry, search))748            traitdescriptionid = search[0].traitdesid749        traitdescrips_uni.ix[index, "traitdescriptionid"] = traitdescriptionid750    logger.debug("--- TABLE: traitdescription ---")751    logger.debug("Inserted: {}".format(inscount))752    logger.debug("Found: {}".format(foundcoconut))753    logger.debug("Errors: {}".format(errcount))754    logger.debug("-------------------------------")755    # left join traitdescrips_uni to main df on traitdescripidstr756    traitdescrips_uni = traitdescrips_uni[["traitdescripidstr", "traitdescriptionid"]]757    datafile = pd.merge(datafile, traitdescrips_uni, on="traitdescripidstr", how="left")758    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #759    # â                                    SOURCE INFO SETUP                                    â #760    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #761    # Extract source info (incl. contrib & citation ids)762    logger.info("Extracting source information...")763    source_names = ["originalid", "figuretable", "submittedby", "contributoremail",764                    "citation", "doi", "published", "embargorelease", "sourceinfoidstr"]765    source = datafile[source_names]766    source_uni = source.drop_duplicates().reset_index(drop=True)767    source_uni["sourceinfoid"] = np.int(-1)768    source_uni["contributoridstr"] = np.vectorize(idcat)(source_uni.submittedby, source_uni.contributoremail)769    source_uni["citationidstr"] = np.vectorize(idcat)(source_uni.citation, source_uni.doi,770                                                      source_uni.published, source_uni.embargorelease)771    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #772    # â                                       CONTRIBUTORS                                      â #773    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #774    # Extract contributor775    logger.info("Extracting contributors...")776    contrib_names = ["submittedby", "contributoremail", "contributoridstr"]777    contrib = source_uni[contrib_names]778    contrib_uni = contrib.drop_duplicates().reset_index(drop=True)779    contrib_uni["contributorid"] = np.int(-1)780    inscount = 0781    foundcoconut = 0782    errcount = 0783    for index, entry in contrib_uni.iterrows():784        contributorid = np.nan785        search = db2(786            (db2.contributor.submittedby == noneadapter(entry.submittedby)) &787            (db2.contributor.contributoremail == noneadapter(entry.contributoremail))788        ).select(db2.contributor.contributorid)789        if len(search) < 1:790            # insert into db791            inscount += 1792            contributorid = db2.contributor.insert(submittedby=noneadapter(entry.submittedby),793                                                   contributoremail=noneadapter(entry.contributoremail))794            logger.debug("Inserted contributor id {}".format(contributorid))795        elif len(search) == 1:796            # Use the found id797            foundcoconut += 1798            contributorid = search[0].contributorid799        else:800            errcount += 1801            logger.warning("Ambiguous contributor entry provided:\n\n{}\n\n{}".format(entry, search))802            contributorid = search[0].contributorid803        contrib_uni.ix[index, "contributorid"] = contributorid804    logger.debug("---- TABLE: contributor ----")805    logger.debug("Inserted: {}".format(inscount))806    logger.debug("Found: {}".format(foundcoconut))807    logger.debug("Errors: {}".format(errcount))808    logger.debug("----------------------------")809    # left join contrib_uni to source df on contributoridstr810    contrib_uni = contrib_uni[["contributoridstr", "contributorid"]]811    source_uni = pd.merge(source_uni, contrib_uni, on="contributoridstr", how="left")812    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #813    # â                                        CITATIONS                                        â #814    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #815    # Extract citation816    logger.info("Extracting citations...")817    citation_names = ["citation", "doi", "published", "embargorelease", "citationidstr"]818    citation = source_uni[citation_names]819    citation_uni = citation.drop_duplicates().reset_index(drop=True)820    citation_uni["citationid"] = np.int(-1)821    inscount = 0822    foundcoconut = 0823    errcount = 0824    for index, entry in citation_uni.iterrows():825        citationid = np.nan826        search = db2(827            (db2.citation.citation == noneadapter(entry.citation)) &828            (db2.citation.doi == noneadapter(entry.doi)) &829            (db2.citation.published == noneadapter(entry.published)) &830            (db2.citation.embargorelease == noneadapter(entry.embargorelease))831        ).select(db2.citation.citationid)832        if len(search) < 1:833            # insert into db834            inscount += 1835            citationid = db2.citation.insert(citation=noneadapter(entry.citation),836                                             doi=noneadapter(entry.doi),837                                             published=noneadapter(entry.published),838                                             embargorelease=noneadapter(entry.embargorelease))839            logger.debug("Inserted citation id {}".format(citationid))840        elif len(search) == 1:841            # Use the found id842            foundcoconut += 1843            citationid = search[0].citationid844        else:845            errcount += 1846            logger.warning("Ambiguous citation entry provided:\n\n{}\n\n{}".format(entry, search))847            citationid = search[0].citationid848        citation_uni.ix[index, "citationid"] = citationid849    logger.debug("----- TABLE: citation ------")850    logger.debug("Inserted: {}".format(inscount))851    logger.debug("Found: {}".format(foundcoconut))852    logger.debug("Errors: {}".format(errcount))853    logger.debug("----------------------------")854    # left join contrib_uni to source df on contributoridstr855    citation_uni = citation_uni[["citationidstr", "citationid"]]856    source_uni = pd.merge(source_uni, citation_uni, on="citationidstr", how="left")857    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #858    # â                                    SOURCE INFO UPLOAD                                   â #859    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #860    inscount = 0861    foundcoconut = 0862    errcount = 0863    for index, entry in source_uni.iterrows():864        sourceinfoid = np.nan865        search = db2(866            (db2.sourceinfo.originalid == noneadapter(entry.originalid)) &867            (db2.sourceinfo.contributorid == noneadapter(entry.contributorid)) &868            (db2.sourceinfo.citationid == noneadapter(entry.citationid)) &869            (db2.sourceinfo.figuretable == noneadapter(entry.figuretable))870        ).select(db2.sourceinfo.sourceid)871        if len(search) < 1:872            # insert into db873            inscount += 1874            sourceinfoid = db2.sourceinfo.insert(originalid=noneadapter(entry.originalid),875                                                 contributorid=noneadapter(entry.contributorid),876                                                 citationid=noneadapter(entry.citationid),877                                                 figuretable=noneadapter(entry.figuretable))878            logger.debug("Inserted source info id {}".format(sourceinfoid))879        elif len(search) == 1:880            # Use the found id881            foundcoconut += 1882            sourceinfoid = search[0].sourceid883        else:884            errcount += 1885            logger.warning("Ambiguous source info entry provided:\n\n{}\n\n{}".format(entry, search))886            sourceinfoid = search[0].sourceid887        source_uni.ix[index, "sourceinfoid"] = sourceinfoid888    logger.debug("---- TABLE: sourceinfo -----")889    logger.debug("Inserted: {}".format(inscount))890    logger.debug("Found: {}".format(foundcoconut))891    logger.debug("Errors: {}".format(errcount))892    logger.debug("----------------------------")893    # left join source_uni to main df on sourceinfoidstr894    source_uni = source_uni[["sourceinfoidstr", "sourceinfoid"]]895    datafile = pd.merge(datafile, source_uni, on="sourceinfoidstr", how="left")896    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #897    # â                                        EXP COND                                         â #898    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #899    # Extract experimental conditions900    logger.info("Extracting experimental conditions...")901    expcond_names = ["replicates", "habitat", "labfield", "arenavalue", "arenaunit", "arenavaluesi", "arenaunitsi",902                     "resrepvalue", "resrepunit", "resrepvaluesi", "resrepunitsi", "expcondidstr"]903    expcond = datafile[expcond_names]904    expcond_uni = expcond.drop_duplicates().reset_index(drop=True)905    expcond_uni["expcondid"] = np.int(-1)906    # logger.debug(locations_uni)907    inscount = 0908    foundcoconut = 0909    errcount = 0910    for index, entry in expcond_uni.iterrows():911        expcondid = np.nan912        search = db2(913            (db2.experimentalconditions.replicates == noneadapter(entry.replicates)) &914            (db2.experimentalconditions.habitat == noneadapter(entry.habitat)) &915            (db2.experimentalconditions.labfield == noneadapter(entry.labfield)) &916            (db2.experimentalconditions.arenavalue == noneadapter(entry.arenavalue)) &917            (db2.experimentalconditions.arenaunit == noneadapter(entry.arenaunit)) &918            (db2.experimentalconditions.arenavaluesi == noneadapter(entry.arenavaluesi)) &919            (db2.experimentalconditions.arenaunitsi == noneadapter(entry.arenaunitsi)) &920            (db2.experimentalconditions.resrepvalue == noneadapter(entry.resrepvalue)) &921            (db2.experimentalconditions.resrepunit == noneadapter(entry.resrepunit)) &922            (db2.experimentalconditions.resrepvaluesi == noneadapter(entry.resrepvaluesi)) &923            (db2.experimentalconditions.resrepunitsi == noneadapter(entry.resrepunitsi))924        ).select(db2.experimentalconditions.experimentid)925        if len(search) < 1:926            # insert into db927            inscount += 1928            expcondid = db2.experimentalconditions.insert(replicates=noneadapter(entry.replicates),929                                                          habitat=noneadapter(entry.habitat),930                                                          labfield=noneadapter(entry.labfield),931                                                          arenavalue=noneadapter(entry.arenavalue),932                                                          arenaunit=noneadapter(entry.arenaunit),933                                                          arenavaluesi=noneadapter(entry.arenavaluesi),934                                                          arenaunitsi=noneadapter(entry.arenaunitsi),935                                                          resrepvalue=noneadapter(entry.resrepvalue),936                                                          resrepunit=noneadapter(entry.resrepunit),937                                                          resrepvaluesi=noneadapter(entry.resrepvaluesi),938                                                          resrepunitsi=noneadapter(entry.resrepunitsi))939            logger.debug("Inserted experimentalconditions id {}".format(expcondid))940        elif len(search) == 1:941            # Use the found id942            foundcoconut += 1943            expcondid = search[0].experimentid944        else:945            errcount += 1946            # logger.warning("Ambiguous experimentalconditions entry provided:\n\n{}\n\n{}".format(entry, search))947            expcondid = search[0].experimentid948        expcond_uni.ix[index, "expcondid"] = expcondid949    logger.debug("--- TABLE: experimentalconditions ---")950    logger.debug("Inserted: {}".format(inscount))951    logger.debug("Found: {}".format(foundcoconut))952    logger.debug("Errors: {}".format(errcount))953    logger.debug("-------------------------------------")954    # left join expcond_uni to main df on expcondidstr955    expcond_uni = expcond_uni[["expcondidstr", "expcondid"]]956    datafile = pd.merge(datafile, expcond_uni, on="expcondidstr", how="left")957    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #958    # â                                        MAIN TABLE                                       â #959    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #960    # Make maintable (incl ids for all other tables)961    logger.info("Creating maintable data entries...")962    maintable_names = ['originalid', 'originaltraitname', 'originaltraitdef', 'standardisedtraitname',963                       'standardisedtraitdef', 'originaltraitvalue', 'originaltraitunit', 'originalerrorpos',964                       'originalerrorneg', 'originalerrorunit', 'standardisedtraitvalue', 'standardisedtraitunit',965                       'standardisederrorpos', 'standardisederrorneg', 'standardisederrorunit', 'ambienttemp',966                       'ambienttempmethod', 'ambienttempunit', 'ambientlight', 'ambientlightunit', 'secondstressor',967                       'secondstressordef', 'secondstressorvalue', 'secondstressorunit', 'timestart', 'timeend',968                       'totalobstimevalue', 'totalobstimeunit', 'totalobstimevaluesi', 'totalobstimeunitsi',969                       'totalobstimenotes', 'interactor1', 'interactor1common', 'interactor1wholepart',970                       'interactor1wholeparttype', 'interactor1number', 'interactor1stage', 'interactor1temp',971                       'interactor1tempunit', 'interactor1tempmethod', 'interactor1growthtemp',972                       'interactor1growthtempunit', 'interactor1growthdur', 'interactor1growthdurunit',973                       'interactor1growthtype', 'interactor1acc', 'interactor1acctemp', 'interactor1acctempnotes',974                       'interactor1acctime', 'interactor1acctimenotes', 'interactor1acctimeunit', 'interactor1origtemp',975                       'interactor1origtempnotes', 'interactor1origtime', 'interactor1origtimenotes',976                       'interactor1origtimeunit', 'interactor1equilibtimevalue', 'interactor1equilibtimeunit',977                       'interactor1size', 'interactor1sizeunit', 'interactor1sizetype', 'interactor1sizesi',978                       'interactor1sizeunitsi', 'interactor1denvalue', 'interactor1denunit', 'interactor1dentypesi',979                       'interactor1denvaluesi', 'interactor1denunitsi', 'interactor1massvaluesi',980                       'interactor1massunitsi', 'interactor2', 'interactor2common', 'interactor2stage',981                       'interactor2temp', 'interactor2tempunit', 'interactor2tempmethod', 'interactor2growthtemp',982                       'interactor2growthtempunit', 'interactor2growthdur', 'interactor2growthdurunit',983                       'interactor2growthtype', 'interactor2acc', 'interactor2acctemp', 'interactor2acctempnotes',984                       'interactor2acctime', 'interactor2acctimenotes', 'interactor2acctimeunit', 'interactor2origtemp',985                       'interactor2origtempnotes', 'interactor2origtime', 'interactor2origtimenotes',986                       'interactor2origtimeunit', 'interactor2equilibtimevalue', 'interactor2equilibtimeunit',987                       'interactor2size', 'interactor2sizeunit', 'interactor2sizetype', 'interactor2sizesi',988                       'interactor2sizeunitsi', 'interactor2denvalue', 'interactor2denunit', 'interactor2dentypesi',989                       'interactor2denvaluesi', 'interactor2denunitsi', 'interactor2massvaluesi',990                       'interactor2massunitsi', 'locationid', 'interactor1id', 'interactor2id', 'traitdescriptionid',991                       'sourceinfoid', 'expcondid', 'notes']992    maintable = datafile[maintable_names]993    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #994    # â                                         UPLOAD                                          â #995    # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ #996    # Upload to db997    logger.info("Uploading data to db...")998    inscount = 0999    log_triggers = {}1000    short_dataset = True1001    if len(maintable) > 100:1002        short_dataset = False1003        trigger_list = [int(len(maintable) * (x / float(10))) - 1 for x in range(0, 11)]1004        trigger_list[0] = 01005        percentile_list = [x * 10 for x in range(0, 11)]1006        log_triggers = dict(zip(trigger_list, percentile_list))1007    for index, entry in maintable.iterrows():1008        if not short_dataset:1009            if index in log_triggers.keys():1010                logger.info('Uploading row {}/{} ({}%)...'.format(index + 1, len(maintable), log_triggers[index]))1011        # insert into db1012        inscount += 11013        mainid = db2.maintable.insert(1014            originalid=noneadapter(entry.originalid),1015            originaltraitname=noneadapter(entry.originaltraitname),1016            originaltraitdef=noneadapter(entry.originaltraitdef),1017            standardisedtraitname=noneadapter(entry.standardisedtraitname),1018            standardisedtraitdef=noneadapter(entry.standardisedtraitdef),1019            originaltraitvalue=noneadapter(entry.originaltraitvalue),1020            originaltraitunit=noneadapter(entry.originaltraitunit),1021            originalerrorpos=noneadapter(entry.originalerrorpos),1022            originalerrorneg=noneadapter(entry.originalerrorneg),1023            originalerrorunit=noneadapter(entry.originalerrorunit),1024            standardisedtraitvalue=noneadapter(entry.standardisedtraitvalue),1025            standardisedtraitunit=noneadapter(entry.standardisedtraitunit),1026            standardisederrorpos=noneadapter(entry.standardisederrorpos),1027            standardisederrorneg=noneadapter(entry.standardisederrorneg),1028            standardisederrorunit=noneadapter(entry.standardisederrorunit),1029            ambienttemp=noneadapter(entry.ambienttemp),1030            ambienttempmethod=noneadapter(entry.ambienttempmethod),1031            ambienttempunit=noneadapter(entry.ambienttempunit),1032            ambientlight=noneadapter(entry.ambientlight),1033            ambientlightunit=noneadapter(entry.ambientlightunit),1034            secondstressor=noneadapter(entry.secondstressor),1035            secondstressordef=noneadapter(entry.secondstressordef),1036            secondstressorvalue=noneadapter(entry.secondstressorvalue),1037            secondstressorunit=noneadapter(entry.secondstressorunit),1038            timestart=noneadapter(entry.timestart),1039            timeend=noneadapter(entry.timeend),1040            totalobstimevalue=noneadapter(entry.totalobstimevalue),1041            totalobstimeunit=noneadapter(entry.totalobstimeunit),1042            totalobstimevaluesi=noneadapter(entry.totalobstimevaluesi),1043            totalobstimeunitsi=noneadapter(entry.totalobstimeunitsi),1044            totalobstimenotes=noneadapter(entry.totalobstimenotes),1045            interactor1=noneadapter(entry.interactor1),1046            interactor1common=noneadapter(entry.interactor1common),1047            interactor1wholepart=noneadapter(entry.interactor1wholepart),1048            interactor1wholeparttype=noneadapter(entry.interactor1wholeparttype),1049            interactor1number=noneadapter(entry.interactor1number),1050            interactor1stage=noneadapter(entry.interactor1stage),1051            interactor1temp=noneadapter(entry.interactor1temp),1052            interactor1tempunit=noneadapter(entry.interactor1tempunit),1053            interactor1tempmethod=noneadapter(entry.interactor1tempmethod),1054            interactor1growthtemp=noneadapter(entry.interactor1growthtemp),1055            interactor1growthtempunit=noneadapter(entry.interactor1growthtempunit),1056            interactor1growthdur=noneadapter(entry.interactor1growthdur),1057            interactor1growthdurunit=noneadapter(entry.interactor1growthdurunit),1058            interactor1growthtype=noneadapter(entry.interactor1growthtype),1059            interactor1acc=noneadapter(entry.interactor1acc),1060            interactor1acctemp=noneadapter(entry.interactor1acctemp),1061            interactor1acctempnotes=noneadapter(entry.interactor1acctempnotes),1062            interactor1acctime=noneadapter(entry.interactor1acctime),1063            interactor1acctimenotes=noneadapter(entry.interactor1acctimenotes),1064            interactor1acctimeunit=noneadapter(entry.interactor1acctimeunit),1065            interactor1origtemp=noneadapter(entry.interactor1origtemp),1066            interactor1origtempnotes=noneadapter(entry.interactor1origtempnotes),1067            interactor1origtime=noneadapter(entry.interactor1origtime),1068            interactor1origtimenotes=noneadapter(entry.interactor1origtimenotes),1069            interactor1origtimeunit=noneadapter(entry.interactor1origtimeunit),1070            interactor1equilibtimevalue=noneadapter(entry.interactor1equilibtimevalue),1071            interactor1equilibtimeunit=noneadapter(entry.interactor1equilibtimeunit),1072            interactor1size=noneadapter(entry.interactor1size),1073            interactor1sizeunit=noneadapter(entry.interactor1sizeunit),1074            interactor1sizetype=noneadapter(entry.interactor1sizetype),1075            interactor1sizesi=noneadapter(entry.interactor1sizesi),1076            interactor1sizeunitsi=noneadapter(entry.interactor1sizeunitsi),1077            interactor1denvalue=noneadapter(entry.interactor1denvalue),1078            interactor1denunit=noneadapter(entry.interactor1denunit),1079            interactor1dentypesi=noneadapter(entry.interactor1dentypesi),1080            interactor1denvaluesi=noneadapter(entry.interactor1denvaluesi),1081            interactor1denunitsi=noneadapter(entry.interactor1denunitsi),1082            interactor1massvaluesi=noneadapter(entry.interactor1massvaluesi),1083            interactor1massunitsi=noneadapter(entry.interactor1massunitsi),1084            interactor2=noneadapter(entry.interactor2),1085            interactor2common=noneadapter(entry.interactor2common),1086            interactor2stage=noneadapter(entry.interactor2stage),1087            interactor2temp=noneadapter(entry.interactor2temp),1088            interactor2tempunit=noneadapter(entry.interactor2tempunit),1089            interactor2tempmethod=noneadapter(entry.interactor2tempmethod),1090            interactor2growthtemp=noneadapter(entry.interactor2growthtemp),1091            interactor2growthtempunit=noneadapter(entry.interactor2growthtempunit),1092            interactor2growthdur=noneadapter(entry.interactor2growthdur),1093            interactor2growthdurunit=noneadapter(entry.interactor2growthdurunit),1094            interactor2growthtype=noneadapter(entry.interactor2growthtype),1095            interactor2acc=noneadapter(entry.interactor2acc),1096            interactor2acctemp=noneadapter(entry.interactor2acctemp),1097            interactor2acctempnotes=noneadapter(entry.interactor2acctempnotes),1098            interactor2acctime=noneadapter(entry.interactor2acctime),1099            interactor2acctimenotes=noneadapter(entry.interactor2acctimenotes),1100            interactor2acctimeunit=noneadapter(entry.interactor2acctimeunit),1101            interactor2origtemp=noneadapter(entry.interactor2origtemp),1102            interactor2origtempnotes=noneadapter(entry.interactor2origtempnotes),1103            interactor2origtime=noneadapter(entry.interactor2origtime),1104            interactor2origtimenotes=noneadapter(entry.interactor2origtimenotes),1105            interactor2origtimeunit=noneadapter(entry.interactor2origtimeunit),1106            interactor2equilibtimevalue=noneadapter(entry.interactor2equilibtimevalue),1107            interactor2equilibtimeunit=noneadapter(entry.interactor2equilibtimeunit),1108            interactor2size=noneadapter(entry.interactor2size),1109            interactor2sizeunit=noneadapter(entry.interactor2sizeunit),1110            interactor2sizetype=noneadapter(entry.interactor2sizetype),1111            interactor2sizesi=noneadapter(entry.interactor2sizesi),1112            interactor2sizeunitsi=noneadapter(entry.interactor2sizeunitsi),1113            interactor2denvalue=noneadapter(entry.interactor2denvalue),1114            interactor2denunit=noneadapter(entry.interactor2denunit),1115            interactor2dentypesi=noneadapter(entry.interactor2dentypesi),1116            interactor2denvaluesi=noneadapter(entry.interactor2denvaluesi),1117            interactor2denunitsi=noneadapter(entry.interactor2denunitsi),1118            interactor2massvaluesi=noneadapter(entry.interactor2massvaluesi),1119            interactor2massunitsi=noneadapter(entry.interactor2massunitsi),1120            locationid=noneadapter(entry.locationid),1121            interactor1id=noneadapter(entry.interactor1id),1122            interactor2id=noneadapter(entry.interactor2id),1123            traitdescriptionid=noneadapter(entry.traitdescriptionid),1124            sourceinfoid=noneadapter(entry.sourceinfoid),1125            expcondid=noneadapter(entry.expcondid),1126            notes=noneadapter(entry.notes),1127        )1128    logger.debug("----- TABLE: maintable -----")1129    logger.debug("Inserted: {}".format(inscount))1130    logger.debug("----------------------------")1131    logger.info("Data upload complete.")1132    logger.info("Adding successful hash to db")1133    hashid = db2.dataset_hash.insert(filehash=filemd5,1134                                     filename=basename(csvpath)[:128])1135    logger.info(asciilogo())1136    return True1137def eod_upload_run(logger=False):1138    """Upload all validated datafiles, then move to 'complete' when complete or 'errored' when errored."""1139    import os1140    import time1141    import shutil1142    import logzero1143    if not logger:1144        logger = logzero.setup_logger(logfile="logs/vtuploads.log",1145                                      formatter=logging.Formatter(1146                                          '%(asctime)s - %(levelname)-7s - %(funcName)s - %(message)s'),1147                                      disableStderrLogger=True)1148        logger.info("Turned on logger in eod_upload_run")1149    success = True1150    # Load db connection1151    db2 = current.db21152    logger.info("Starting upload run at {}".format(time.strftime("%H:%M:%S", time.gmtime())))1153    # Find all files in validated and set up paths1154    cwd = os.getcwd()1155    logger.info("CWD: {}".format(cwd))     # Just to check that the cwd is where we think it is when the scheduler is run...1156    uploadpath = os.path.join(cwd, "applications/VectorBiteDataPlatform/uploads")1157    validatedpath = os.path.join(uploadpath, "validated")1158    successpath = os.path.join(uploadpath, "completed")1159    failpath = os.path.join(uploadpath, "errored")1160    templatepath = os.path.join(cwd, "applications/VectorBiteDataPlatform/static/templates/vectraits_template.csv")1161    uploadfiles = [f for f in os.listdir(validatedpath) if os.path.isfile(os.path.join(validatedpath, f))]1162    logger.debug("Files to upload: {}".format(uploadfiles))1163    # Run upload_vectraits_dataset on each file1164    for candidatefile in uploadfiles:1165        candidatepath = os.path.join(validatedpath, candidatefile)1166        logger.info("Starting upload of {}".format(candidatepath))1167        uploadsuccess = False1168        try:1169            uploadsuccess = upload_vectraits_dataset(candidatepath, templatepath, logger)1170            db2.commit()1171        except VTUploadError:1172            logger.exception("Handled error encountered when processing {}".format(candidatefile))1173            success = False1174            db2.rollback()1175        except Exception:1176            logger.exception("Unhandled error encountered when processing {}".format(candidatefile))1177            success = False1178            db2.rollback()1179        try:1180            if uploadsuccess:1181                logger.info("Upload successful")1182                shutil.move(candidatepath, os.path.join(successpath, candidatefile))1183            else:1184                logger.info("Upload failed")1185                shutil.move(candidatepath, os.path.join(failpath, candidatefile))1186        except IOError:1187            logger.exception("File move failed, remains in validated folder.")1188    logger.info("Completed upload run at {}".format(time.strftime("%H:%M:%S", time.gmtime())))1189    return success1190if __name__ == "__main__":1191    # run testing code1192    import time1193    moduledebug = True1194    if moduledebug:1195        import logzero1196        logger = logzero.setup_logger(logfile="/tmp/vtfuncsdebug.log",1197                                      formatter=logging.Formatter(1198                                          '%(asctime)s - %(levelname)-7s - %(funcName)s - %(message)s'),1199                                      level=logging.DEBUG1200                                      )1201    else:1202        logger = logging.getLogger("web2py.app.vbdp")1203    logger.info("Test commencing at {}".format(time.strftime("%H:%M:%S", time.gmtime())))1204    # upload_vectraits_dataset("applications/VectorBiteDataPlatform/uploads/tests/passing_superlong.csv",1205    #                          "applications/VectorBiteDataPlatform/static/templates/vectraits_template.csv")1206    eod_upload_run(logger)...db.py
Source:db.py  
1# -*- coding: utf-8 -*-23#########################################################################4##Written by C. S. Schroeder, A Theory of Publishing5##Copyright (C) 2011 Equimind Financial LLC.6##7##Permission is hereby granted, free of charge, to any8##person obtaining a copy of this software and associated9##documentation files (the "Software"), to deal in the10##Software without restriction, including without limitation11##the rights to use, copy, modify, merge, publish,12##distribute, sublicense, and/or sell copies of the13##Software, and to permit persons to whom the Software is14##furnished to do so, subject to the following conditions:15##16##The above copyright notice and this permission notice17##shall be included in all copies or substantial portions of18##the Software.19##20##THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY21##KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE22##WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR23##PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS24##OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR25##OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR26##OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE27##SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.28##29##  Powered by web2py, Thanks Massimo!30#########################################################################3132if not request.env.web2py_runtime_gae:     33    ## if NOT running on Google App Engine use SQLite or other DB34    db = DAL('sqlite://storage.sqlite') 35else:36    ## connect to Google BigTable (optional 'google:datastore://namespace')37    db = DAL('google:datastore') 38    ## store sessions and tickets there39    session.connect(request, response, db = db) 40    ## or store session in Memcache, Redis, etc.41    ## from gluon.contrib.memdb import MEMDB42    ## from google.appengine.api.memcache import Client43    ## session.connect(request, response, db = MEMDB(Client()))4445## by default give a view/generic.extension to all actions from localhost46## none otherwise. a pattern can be 'controller/function.extension'47response.generic_patterns = ['*'] if request.is_local else []48T.current_languages = ['en', 'en-en']49#T.force('es-es')50#########################################################################51## Here is sample code if you need for52## - email capabilities53## - authentication (registration, login, logout, ... )54## - authorization (role based authorization)55## - services (xml, csv, json, xmlrpc, jsonrpc, amf, rss)56## - old style crud actions57## (more options discussed in gluon/tools.py)58#########################################################################5960from gluon.tools import Auth, Crud, Service, PluginManager, prettydate61auth = Auth(db, hmac_key=Auth.get_or_create_key()) 62crud, service, plugins = Crud(db), Service(), PluginManager()6364## create all tables needed by auth if not custom tables65auth.define_tables() 6667if len(db(db.auth_group.role == 'admin').select())<1:68    db.auth_group.insert(role= 'admin', description="For administration of memebers")69if len(db(db.auth_group.role == 'reviewer').select())<1:70    db.auth_group.insert(role= 'reviewer', description="For submitting reviews")7172## user must be admin of application73## configure email74mail=auth.settings.mailer75## configure auth policy76auth.settings.registration_requires_verification = False77auth.settings.registration_requires_approval = False78auth.settings.reset_password_requires_verification = True7980## if you need to use OpenID, Facebook, MySpace, Twitter, Linkedin, etc.81## register with janrain.com, write your domain:api_key in private/janrain.key82##from gluon.contrib.login_methods.rpx_account import use_janrain83##use_janrain(auth,filename='private/janrain.key')8485#########################################################################86## Define your tables below (or better in another model file) for example87##88## >>> db.define_table('mytable',Field('myfield','string'))89##90## Fields can be 'string','text','password','integer','double','boolean'91##       'date','time','datetime','blob','upload', 'reference TABLENAME'92## There is an implicit 'id integer autoincrement' field93## Consult manual for more options, validators, etc.94##95## More API examples for controllers:96##97## >>> db.mytable.insert(myfield='value')98## >>> rows=db(db.mytable.myfield=='value').select(db.mytable.ALL)99## >>> for row in rows: print row.id, row.myfield100#########################################################################101102103db.define_table('reviewer', 104                Field('screenname', 'string', length=(30,2)),105                Field('userid', db.auth_user),106                Field('photo', 'upload', length=(130000, 1024), requires=[IS_IMAGE(), IS_LENGTH(130000, 1024)]), 107                Field('city', 'string', length=25, requires=IS_LENGTH(25,2)), 108                Field('state', 'string', length=25,requires=IS_LENGTH(25,2)), 109                Field('country', 'string', length=25,requires=IS_LENGTH(25,2)), 110                Field('loves', 'string', length=200,requires=IS_LENGTH(200,2)), 111                Field('likes', 'string', length=200,requires=IS_LENGTH(200,2)), 112                Field('dislikes', 'string', length=200,requires=IS_LENGTH(200,2)), 113                Field('about_me', 'text'))114115db.define_table('region', 116                Field('name', 'string', length=25,requires=IS_LENGTH(25,2)),117                Field('blurb', 'string', length=500,requires=IS_LENGTH(500,2)),118                Field('photo', 'upload', length=(130000, 1024), requires=[IS_IMAGE(), IS_LENGTH(130000, 1024)]), 119                Field('city', 'string', length=25, requires=IS_LENGTH(25,2)), 120                Field('state', 'string', length=25,requires=IS_LENGTH(25,2)), 121                Field('country', 'string', length=25,requires=IS_LENGTH(25,2)), 122                Field('product_categories', 'string', length=400,requires=IS_LENGTH(400,2)), 123                Field('product_lines', 'string', length=400,requires=IS_LENGTH(400,2)), 124                Field('famous_for', 'string', length=200,requires=IS_LENGTH(200,2)), 125                Field('about', 'text'))126127db.define_table('place', 128                Field('name', 'string', length=25,requires=IS_LENGTH(25,2)), 129                Field('blurb', 'string', length=500, requires=IS_LENGTH(500,2)),130                Field('photo', 'upload', length=(130000, 1024), requires=[IS_IMAGE(), IS_LENGTH(130000, 1024)]), 131                Field('region', db.region, required=True, requires=IS_IN_DB(db, 'region.id', '%(name)s')),132                Field('type', 'string', length=25, requires=IS_IN_SET([T('computing'), T('audio-visual'), T('mobile')])),133                Field('description', 'string', length=400,requires=IS_LENGTH(400,2)),134                Field('cost', 'string', length=200,requires=IS_LENGTH(200,2)), 135                Field('about', 'text'))136137db.define_table('event', 138                Field('name', 'string', length=25,requires=IS_LENGTH(25,2)), 139                Field('blurb', 'string', length=500,requires=IS_LENGTH(500,2)),140                Field('photo', 'upload', length=(130000, 1024), requires=[IS_IMAGE(), IS_LENGTH(130000, 1024)]), 141                Field('region', db.region, required=True, requires=IS_IN_DB(db, 'region.id', '%(name)s')),142                Field('place', db.place, requires=IS_NULL_OR(IS_IN_DB(db, 'place.id', '%(name)s'))),143                Field('description', 'string', length=200,requires=IS_LENGTH(200,2)),144                Field('cost', 'string', length=200,requires=IS_LENGTH(200,2)), 145                Field('about', 'text'))146147148class RatingWidget(SQLFORM.widgets.options):149    @staticmethod150    def widget(field, value, **attributes):151        attr = SQLFORM.widgets.options._attributes(field, {}, **attributes)152153        if isinstance(field.requires, IS_NULL_OR)\154             and hasattr(field.requires.other, 'options'):155            opts = [TD(INPUT(_type='radio', _name=field.name,156                             _value='', value=value), '')]157            options = field.requires.other.options()158        elif hasattr(field.requires, 'options'):159            opts = []160            options = field.requires.options()161        else:162            raise SyntaxError, 'widget cannot determine options of %s' % field163        opts += [TD(INPUT(_type='radio', _name=field.name,164                          _value=k, value=value), '') for (k, v) in options]165        return TABLE(TR(*(['low']+opts+['high'])), **attr) 166167168import datetime169170db.define_table('region_review', 171                Field('ref_id', db.region, required=True, requires=IS_IN_DB(db, 'region.id', '%(name)s')),172                Field('blurb', 'string', length=500,requires=IS_LENGTH(500,2)),173                Field('author', db.reviewer, requires=IS_IN_DB(db, 'reviewer.id', "%(screenname)s")),174                Field('photo', 'upload', length=(130000, 1024), requires=[IS_IMAGE(), IS_LENGTH(130000, 1024)]), 175                Field('photo1', 'upload', length=(130000, 1024), requires=[IS_IMAGE(), IS_LENGTH(130000, 1024)]), 176                Field('photo2', 'upload', length=(130000, 1024), requires=[IS_IMAGE(), IS_LENGTH(130000, 1024)]), 177                Field('title', 'string', length=100,requires=IS_LENGTH(100,2)), 178                Field('subject', 'string', length=25,requires=IS_LENGTH(25,2)), 179                Field('the_good', 'string', length=250,requires=IS_LENGTH(250,2)), 180                Field('the_bad', 'string', length=250,requires=IS_LENGTH(250,2)), 181                Field('date', 'datetime', default=datetime.datetime.now()), 182                Field('text', 'text'), 183                Field('rating', 'integer', default=1, requires=IS_IN_SET([1,2,3,4,5]), widget=RatingWidget.widget),184                Field('publish', 'boolean', default=True)185                )186187db.define_table('place_review', 188                Field('ref_id', db.place, required=True, requires=IS_IN_DB(db, 'place.id', '%(name)s')),189                Field('blurb', 'string', length=500,requires=IS_LENGTH(500,2)),190                Field('author', db.reviewer, requires=IS_IN_DB(db, 'reviewer.id', "%(screenname)s")),191                Field('photo', 'upload', length=(130000, 1024), requires=[IS_IMAGE(), IS_LENGTH(130000, 1024)]), 192                Field('photo1', 'upload', length=(130000, 1024), requires=[IS_IMAGE(), IS_LENGTH(130000, 1024)]), 193                Field('photo2', 'upload', length=(130000, 1024), requires=[IS_IMAGE(), IS_LENGTH(130000, 1024)]), 194                Field('title', 'string', length=100,requires=IS_LENGTH(100,2)), 195                Field('subject', 'string', length=25,requires=IS_LENGTH(25,2)), 196                Field('the_good', 'string', length=250,requires=IS_LENGTH(250,2)), 197                Field('the_bad', 'string', length=250,requires=IS_LENGTH(250,2)), 198                Field('date', 'datetime', default=datetime.datetime.now()), 199                Field('text', 'text'),200                Field('rating', 'integer', default=1,  requires=IS_IN_SET([1,2,3,4,5]), widget=RatingWidget.widget),201                Field('publish', 'boolean', default=True)202                )203204db.define_table('event_review', 205                Field('ref_id', db.event, required=True, requires=IS_IN_DB(db, 'event.id', '%(name)s')),206                Field('blurb', 'string', length=500,requires=IS_LENGTH(500,2)),207                Field('author', db.reviewer, requires=IS_IN_DB(db, 'reviewer.id', "%(screenname)s")),208                Field('photo', 'upload', length=(130000, 1024), requires=[IS_IMAGE(), IS_LENGTH(130000, 1024)]), 209                Field('photo1', 'upload', length=(130000, 1024), requires=[IS_IMAGE(), IS_LENGTH(130000, 1024)]), 210                Field('photo2', 'upload', length=(130000, 1024), requires=[IS_IMAGE(), IS_LENGTH(130000, 1024)]), 211                Field('title', 'string', length=100,requires=IS_LENGTH(100,2)), 212                Field('subject', 'string', length=25,requires=IS_LENGTH(25,2)), 213                Field('the_good', 'string', length=250,requires=IS_LENGTH(250,2)), 214                Field('the_bad', 'string', length=250,requires=IS_LENGTH(250,2)), 215                Field('type', 'string', length=25,requires=IS_LENGTH(25,2)), 216                Field('date', 'datetime', default=datetime.datetime.now()), 217                Field('text', 'text'),218                Field('rating', 'integer', default=1, requires=IS_IN_SET([1,2,3,4,5]), widget=RatingWidget.widget),219                Field('publish', 'boolean', default=True)220                )221222223#############224##The following is Open source code modified for this application225##Taken from the "simple comments" web2py plugin.226##No licensing applicable227#############228import datetime229230db.define_table('plugin_simple_comments_comment',231                Field('tablename',232                      writable=False,readable=False),233                Field('record_id','integer',234                      writable=False,readable=False),235                Field('body','text', requires=[IS_NOT_EMPTY(), IS_LENGTH(1000, 1)],label=T('Your comment'), widget=SQLFORM.widgets.text.widget),236                Field('created_by',db.auth_user,default=auth.user_id,237                      readable=False,writable=False),238                Field('created_on','datetime',default=datetime.datetime.now(),
...test_predicates.py
Source:test_predicates.py  
...3def test_unit_predicates():4    assert not is_(0)(1)5    assert (is_(0) | is_(1))(1)6    assert (is_none | is_(0))(0)7    assert is_length(1 * units.cm)8    assert is_length(30.0 * units.mm)...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
