Best Python code snippet using pandera_python
excel_splitter.py
Source:excel_splitter.py  
1import os2import pandas as pd3from openpyxl import load_workbook4from openpyxl.styles.colors import Color5from openpyxl.styles import PatternFill, Border, Side, Alignment, Font6from utilities.utility_functions import (7    generate_delimiters,8    check_against_truth_threshold,9    remove_items_from_list10)11question_cell_colors = ['FFFBE4E4', 'FFFBE4E4', 'FFEEEDEC']12non_value_colors = ['00000000', 'FFC0C0C0']13value_storage = ['Sample Size', 'Mentions', '% Sample Size', 'Count', 'Column %', 'Mean', 'Median']14first_row_of_data = 715column_header_row = 616def add_formatting(cell, wrap=True, h_align='center', v_align='center', size=8, bold=False, cell_color=None, float=True17                   ):18    cell.alignment = Alignment(horizontal=h_align, vertical=v_align, wrap_text=wrap)19    cell.font = Font(bold=bold, size=size)20    cell.border = Border(top=None, left=None, right=None, bottom=None)21    if cell_color is not None:22        PatternFill(fill_type='solid', fgColor=cell_color)23    try:24        if cell.value < 1:25            cell.number_format = '0%'26        else:27            cell.number_format = '0.0' if float else '0'28    except TypeError:29        pass30def create_mergable_list(lst):31    most_recent_value = None32    new_lst = []33    for x in lst:34        if x is not None:35            new_lst.append(x)36            most_recent_value = x37        else:38            new_lst.append(most_recent_value)39    return new_lst40def move_worksheet(wb, worksheet_to_move, new_position):41    order = [wb.sheetnames.index(x) for x in wb.sheetnames]42    popped = order.pop(wb.sheetnames.index(worksheet_to_move))43    order.insert(new_position, popped)44    wb._sheets = [wb._sheets[i] for i in order]45def create_data_worksheets(file, data_dict):46    with pd.ExcelWriter(file) as writer:47        for idx, (question, info) in enumerate(data_dict.items()):48            sheet_name = f'T{(idx + 1)}'49            data = info['frame']50            data.to_excel(51                writer,52                sheet_name=sheet_name,53                startrow=3,54                index_label= False,55                engine='openpyxl'56            )57def general_format_all_cells(ws, row_start=4):58    for col in ws.iter_cols(min_row=row_start):59        for cell in col:60            add_formatting(cell)61def format_series_headers(ws, row_start=4, row_end=5):62    for col in ws.iter_cols(min_row=row_start, max_row=row_end):63        for cell_idx, cell in enumerate(col):64            add_formatting(cell, h_align='center', v_align='bottom', bold=True if cell_idx < 2 else False)65def format_bases(ws, row_start=6, row_end=6):66    for col in ws.iter_cols(min_row=row_start, max_row=row_end):67        for cell_idx, cell in enumerate(col):68            add_formatting(cell, h_align='center', v_align='center', float=False)69def format_category_headers(ws, is_multiindex=False):70    for col in ws.iter_cols(min_col=1, max_col=2 if is_multiindex else 1):71        for cell_idx, cell in enumerate(col):72            add_formatting(cell, h_align='left', v_align='center', bold=True)73def add_and_format_defaults(ws, question, is_multiindex):74    defaults = {75        'A1': {'val': question},76        'A3': {'val': 'bar'},77        'B3': {'val': '*Sort'},78        'B6' if is_multiindex else 'A6': {'val': 'Base'}79    }80    for idx, (place, contents) in enumerate(defaults.items()):81        ws[place] = contents['val']82        add_formatting(83            ws[place],84            h_align='right' if contents['val'] == 'Base' else 'left',85            wrap=False, bold=(True if idx == 0 else False),86            size=12 if idx == 0 else 887        )88def adjust_widths(ws, columns, width=20):89    for column in columns:90        ws.column_dimensions[column].width = width91def adjust_heights(ws):92    ws.row_dimensions[4].height = 2593    for row in range(6, (ws.max_row + 1)):94        ws.row_dimensions[row].height = 2595def preselect_data(ws, is_multiindex):96    #  c = Color(indexed=32)97    #  c = Color(theme=6, tint=0.5)98    colors = {99        'A1': Color(theme=4),100        'C6' if is_multiindex else 'B6': Color(theme=5),101        'A8' if is_multiindex else 'B5': Color(theme=7),102    }103    data_selection = Color(theme=7)104    for cell, color in colors.items():105        ws[cell].fill = PatternFill("solid", fgColor=color)106    for col in ws.iter_cols(min_row=8 if is_multiindex else 7, max_col=3 if is_multiindex else 2):107        for cell in col:108            cell.fill = PatternFill("solid", fgColor=data_selection)109def format_data_worksheets(wb, data_dict):110    questions = data_dict.keys()111    for sheet, question in zip(wb.sheetnames, questions):112        is_multiindex = isinstance(data_dict[question]['frame'].index, pd.MultiIndex)113        ws = wb[sheet]114        if not is_multiindex:115            ws.delete_rows(7)116        general_format_all_cells(ws)117        format_series_headers(ws)118        format_bases(ws)119        format_category_headers(ws, is_multiindex)120        add_and_format_defaults(ws, question, is_multiindex)121        adjust_widths(ws, ['A', 'B'] if is_multiindex else ['A'])122        adjust_heights(ws)123        preselect_data(ws, is_multiindex)124def create_contents_page(wb, entries):125    sheet = wb.create_sheet(title='Contents')126    sheet.append(['Contents'])127    for q_idx, question in enumerate(entries):128        sheet.append([f'T{(q_idx + 1)}', question])129    move_worksheet(wb, 'Contents', 0)130def create_new_workbook(file_name, data_dict):131    output_file_name = file_name.replace('.xlsx', '_output.xlsx')132    create_data_worksheets(output_file_name, data_dict)133    wb = load_workbook(output_file_name)134    print(wb.loaded_theme)135    format_data_worksheets(wb, data_dict)136    create_contents_page(wb, data_dict.keys())137    wb.save(output_file_name)138def clean_up_questions(questions, question_indices):139    most_recent_question = questions[0].split()140    reject_questions, reject_indexes = [], []141    for q_idx, (question, question_index) in enumerate(zip(questions, question_indices)):142        if q_idx != 0:143            q_split = question.split()144            similarity_check = [x == y for x, y in zip(most_recent_question, q_split)]145            if check_against_truth_threshold(similarity_check, 0.75):146                reject_questions.append(question_index)147                reject_indexes.append(question_index)148            most_recent_question = q_split149    questions = [x for x in questions if x not in reject_questions]150    question_indices = [x for x in question_indices if x not in reject_indexes]151    return questions, question_indices152def comparative_separation(text, text_to_compare_against):153    text_list = text.split()154    comparative_list = text_to_compare_against.split()155    new_text_list = [x for x_idx, x in enumerate(text_list) if x == comparative_list[x_idx]]156    new_text = ' '.join(new_text_list)157    excluded_text = ' '.join([y for y in text_list if y not in new_text_list])158    excluded_text = None if len(excluded_text) == 1 else excluded_text159    return new_text, excluded_text160def clean_up_question(question):161    outline_delimiters = generate_delimiters(suffix='\t')162    for od in outline_delimiters:163        question = question.replace(od, ': ')164    component_lst = question.split(': ')165    component_lst = [x.strip() for x in component_lst]166    clean_values = {'prefix': component_lst[0], 'question': component_lst[1]}167    try:168        clean_values['statement'] = component_lst[2]169    except IndexError:170        clean_values['statement'] = None171    return clean_values172def clean_up_frame(df, statement=None):173    df = df.set_index(df.columns[0])174    value_row_names = [x for x in value_storage if x in df.index.tolist()]175    value_row_titles = [x for x in value_row_names if any(['%' in x, x in ['Median', 'Mean']])]176    responses = remove_items_from_list(df.index.tolist(), value_row_names)177    responses = value_row_titles if len(responses) == 0 else responses178    df = df[df.index.isin(value_row_titles)]179    if statement is not None:180        statement_array = [statement for _ in responses]181        df['Statement'] = statement_array182        df.index = pd.MultiIndex.from_arrays([statement_array, responses], names=('Statement', 'Response'))183    else:184        df['Responses'] = responses185        df = df.set_index('Responses', drop=True)186        df = df[[x for x in df.columns if x != 'Responses']]187    return df188def get_list_of_fills(ws):189    colors, color_idx = [], []190    for row_idx, row in enumerate(ws.iter_rows(max_col=1)):191        for cell in row:192            color = cell.fill.start_color.rgb193            colors.append(color)194            color_idx = color_idx + ([row_idx] if color in question_cell_colors else [])195    return color_idx196def df_question_scrubber(df):197    index = df.index.tolist()198    split_qs = [x.split() for x in index]199    word_groups = list(zip(*split_qs))200    split_idx = 0201    for group_idx, group in enumerate(word_groups):202        if len(set(group)) != 1:203            split_idx = group_idx204            break205    new_index = [' '.join(x[split_idx:]) for x in split_qs]206    for i, n in zip(index, new_index):207        df =df.rename(index={i: n})208    new_q = ' '.join(split_qs[0][:split_idx])209    new_q = None if len(new_q) == 0 else new_q210    return df, new_q211def data_cleanup(data_dict):212    temp_sheets = {}213    for question, info in data_dict.items():214        try:215            info['frame'], new_q = df_question_scrubber(info['frame'])216        except AttributeError:217            new_q = None218        if new_q is None:219            temp_sheets[question] = info220        else:221            temp_sheets[new_q] = info222    return temp_sheets223def split_excel(file_name):224    """225    Splits original df in to new dataframes to be shuffled and reorganized later226    """227    og_wb = load_workbook(filename=file_name)228    new_sheets = {}229    for sheet in og_wb.worksheets:230        headers = ['Question Values'] + [x.value for x in sheet[column_header_row]][1:]231        cat_headers = create_mergable_list([x.value for x in sheet[column_header_row - 1]][1:])232        cat_headers = [x.split(': ') for x in cat_headers]233        cat_headers = ['Question Values'] + [x[1] for x in cat_headers]234        df = pd.DataFrame(sheet.values)235        sample_size_indices = df.index[df[0] == 'Sample Size'].tolist()236        sample_sizes = df.loc[sample_size_indices[0]].values.tolist()237        question_indices = get_list_of_fills(sheet)238        questions = [x[0] for x in df.filter(items=question_indices, axis=0).values.tolist()]239        df = df.iloc[first_row_of_data:]240        df.columns = pd.MultiIndex.from_arrays([cat_headers, headers, sample_sizes])241        for index_idx, index in enumerate(question_indices):242            question_values = clean_up_question(questions[index_idx])243            # most_recent_question = questions[index_idx]244            if index_idx == len(question_indices) - 1:245                q_df = df.loc[(index + 1):]246            else:247                next_index = (question_indices[index_idx + 1] - 1)248                q_df = df.loc[(index + 1):next_index]249            q_df = clean_up_frame(q_df, question_values['statement'])250            if new_sheets.get(question_values['question']):251                current_frame = new_sheets[question_values['question']]['frame']252                new_sheets[question_values['question']]['frame'] = current_frame.append(q_df)253            else:254                new_sheets[question_values['question']] = {'frame': q_df, 'sample': sample_sizes}255    new_sheets = data_cleanup(new_sheets)...utils.py
Source:utils.py  
...5    This function returns a list of all the tuples of the form (a_1, ..., a_p)6    with a_i between 1 and n-1. These tuples serve as multiindices for tensors.7    """8    return list(itertools.product(range(n), repeat=p))9def is_multiindex(multiindex, n, c_dimension):10    """11    This function determines if a tuple is a multiindex or not12    according to these rules:13    1. () is a multiindex of length 0 (i.e. if the covariant or contravariant dimension14    is 0, the empty tuple is the only 0-multiindex)15    2. The length of a multiindex must be equal to the c_dimension16    3. Each value in the multiindex varies between 0 and n-1.17    """18    if isinstance(multiindex, tuple):19        if len(multiindex) != c_dimension:20            return False21        for value in multiindex:22            if isinstance(value, int) or isinstance(value, float):23                if value < 0 or value >= n:24                    return False25            else:26                return False27        return True28    else:29        return False30def _get_matrix_of_basis_change(basis1, basis2, _dict, jacobian=True):31    """32    This is an internal function. It is used in the change_basis method33    for tensor objects. It computes the matrix that represents the34    identity function from (V, basis1) to (V, basis2). It does so35    using derivatives.36    For example, for the variables37    basis1 = [e0, e1, e2, e3]38    basis2 = [f0, f1, f2, f3]39    _dict = {40        e0: f0 + f1,41        e1: f1,42        e2: f1 + f3,43        e3: f244    }45    the resulting matrix would be46    [[1, 1, 0, 0],47     [0, 1, 0, 0],48     [0, 1, 0, 1],49     [0, 0, 1, 0]]50    if the jacobian keyword is set to True, and its transpose if51    it is false. The transpose works when you're trying to change52    basis in the algebraic sense (instead of the geometrical sense).53    """54    dim = len(basis1)55    L = sympy.zeros(dim)56    for i in range(dim):57        for j in range(dim):58            if jacobian == True:59                L[i, j] = _dict[basis1[i]].diff(basis2[j])60            if jacobian == False:61                L[i, j] = _dict[basis1[j]].diff(basis2[i])62    if L.det() == 0:63        raise ValueError("The transformation is not invertible.")64    return L65def _is_valid_key(key, dim, ct_dim, c_dim):66    """67    This is an internal function, it checks whether a given key (i.e. a pair68    of multiindices) is a valid key for certain dimension dim, contravariant dimension69    ct_dim and covariant dimension c_dim. It does so using the is_multiindex function.70    """71    if len(key) != 2:72        return False73    a, b = key74    if not is_multiindex(a, dim, ct_dim):75        return False76    if not is_multiindex(b, dim, c_dim):77        return False78    return True79def _dict_completer_for_tensor(_dict, _type, dim):80    """81    This function checks that the _dict is in proper form and completes in certain cases.82    Those cases are:83        - If one of the dimensions is 0, it is allowed to put only one multiindex instead84          of a pair.85        - if one of the dimensions is 1, it is allowd to put an integer instead of a86          1-multiindex.87    """88    ct_dim = _type[0]89    c_dim = _type[1]90    new_dict = {}91    if _dict == {}:92        new_dict = {93            (tuple(0 for i in range(ct_dim)), tuple(0 for i in range(c_dim))): 094        }95        return new_dict96    if ct_dim > 0 and c_dim == 0:97        for key in _dict:98            if _is_valid_key(key, dim, ct_dim, c_dim):99                new_dict[key] = _dict[key]100            elif ct_dim == 1 and isinstance(key, int):101                new_dict[((key,), ())] = _dict[key]102            elif is_multiindex(key, dim, ct_dim):103                new_dict[(key, ())] = _dict[key]104            else:105                raise ValueError(106                    "Can't extend key {} because it isn't a {}-multiindex".format(107                        key, ct_dim108                    )109                )110        return new_dict111    if ct_dim == 0 and c_dim > 0:112        for key in _dict:113            if _is_valid_key(key, dim, ct_dim, c_dim):114                new_dict[key] = _dict[key]115            elif c_dim == 1 and isinstance(key, int):116                new_dict[(), (key,)] = _dict[key]117            elif is_multiindex(key, dim, c_dim):118                new_dict[((), key)] = _dict[key]119            else:120                raise ValueError(121                    "Can't extend key {} because it isn't a {}-multiindex".format(122                        key, c_dim123                    )124                )125        return new_dict126    if ct_dim == 1 and c_dim > 0:127        for key in _dict:128            if _is_valid_key(key, dim, ct_dim, c_dim):129                new_dict[key] = _dict[key]130            elif len(key) == 2:131                i, b = key132                if isinstance(i, int) and isinstance(b, int):133                    new_dict[((i,), (b,))] = _dict[key]134                elif isinstance(i, int) and is_multiindex(b, dim, c_dim):135                    new_dict[(i,), b] = _dict[key]136                else:137                    raise ValueError(138                        "{} isn't an integer or {} isn't a {}-multiindex (or int).".format(139                            i, b, c_dim140                        )141                    )142            else:143                raise ValueError("There should only be two things in {}".format(key))144        return new_dict145    if ct_dim > 0 and c_dim == 1:146        for key in _dict:147            if _is_valid_key(key, dim, ct_dim, c_dim):148                new_dict[key] = _dict[key]149            elif len(key) == 2:150                a, j = key151                if isinstance(a, int) and isinstance(j, int):152                    new_dict[(a,), (j,)] = _dict[key]153                elif is_multiindex(a, dim, ct_dim) and isinstance(j, int):154                    new_dict[a, (j,)] = _dict[key]155                else:156                    raise ValueError(157                        "{} should be an integer and {} should be a {}-multiindex (or int in case 1).".format(158                            j, a, ct_dim159                        )160                    )161        return new_dict162    for key in _dict:163        if not _is_valid_key(key, dim, ct_dim, c_dim):164            raise ValueError("Key {} is not compatible with the dimensions")165    return _dict166def _symmetry_completer(_dict):167    new_dict = _dict.copy()...pandas.py
Source:pandas.py  
...12    :return:13    """14    # index should be of same type15    is_multiindex = lambda idx: isinstance(idx, pd.MultiIndex)16    if is_multiindex(df_a.index) != is_multiindex(df_b.index):17        return False18    # index dtypes should be the same19    get_multiindex_dtypes = lambda idx: [idx.get_level_values(i).dtype for i in range(len(idx.levels))]20    if is_multiindex(df_a.index):21        df_a_column_dtypes = get_multiindex_dtypes(df_a.index)22        df_b_column_dtypes = get_multiindex_dtypes(df_b.index)23        if df_a_column_dtypes != df_b_column_dtypes:24            return False25    else:26        if df_a.index.dtype != df_b.index.dtype:27            return False28    # index names should match29    if df_a.index.names != df_b.index.names:30        return False31    # indexes should be disjunct in some cases32    if indices_must_be_disjunct and not df_a.index.intersection(df_b.index).empty:33        return False34    # column index should be of same type35    if is_multiindex(df_a.columns) != is_multiindex(df_b.columns):36        return False37    # index dtypes should be the same38    if is_multiindex(df_a.columns):39        df_a_column_dtypes = get_multiindex_dtypes(df_a.columns)40        df_b_column_dtypes = get_multiindex_dtypes(df_b.columns)41        if df_a_column_dtypes != df_b_column_dtypes:42            return False43    else:44        if df_a.columns.dtype != df_b.columns.dtype:45            return False46    # column names must match47    if df_a.columns.names != df_b.columns.names:48        return False...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
