Best Python code snippet using localstack_python
prepare_input.py
Source:prepare_input.py  
1"""Functions responsible for preparing input."""2import os3from statistics import mean4from typing import List, Tuple, TypedDict, Union5from Bio.PDB import PDBParser, Polypeptide6import pyarrow7import tqdm8import vaex9from anu.constants.amino_acid import amino_acid10from anu.data.dataframe_operation import (11    read_dataframes_from_file,12    save_dataframe_to_file,13)14# Dictionary keys15col_name = [16    "seq",17    "x_pos",18    "y_pos",19    "z_pos",20    "hydropathy",21    "hydropathy_index",22    "acidity_basicity",23    "mass",24    "isoelectric_point",25    "charge",26]27class BuildMatrixDict(TypedDict):28    """Dictionary shape for build matrix class."""29    seq: List[List[int]]30    x_pos: List[List[int]]31    y_pos: List[List[int]]32    z_pos: List[List[int]]33    hydropathy: List[List[int]]34    hydropathy_index: List[List[int]]35    acidity_basicity: List[List[int]]36    mass: List[List[int]]37    isoelectric_point: List[List[int]]38    charge: List[List[int]]39def build_matrix(40    path: str, filename: str, truncate_log: Union[tqdm.tqdm, None] = None41) -> BuildMatrixDict:42    """Build the input matrix for one protein.43    Args:44        path: path of the pdb file.45        filename: name of the file (without extension).46        truncate_log: tqdm logger47    Returns:48        Build matrix dictionary49    """50    PROTEIN_SEQ_MAX_LEN = 400051    protein_matrix = [[0 for x in range(PROTEIN_SEQ_MAX_LEN)] for y in range(10)]52    protein_structure = PDBParser().get_structure(filename, path)53    protein_model = list(protein_structure.get_models())54    protein_chains = list(protein_model[0].get_chains())55    col = 056    try:57        for chain in protein_chains:58            protein_residues = list(chain.get_residues())59            for residue in protein_residues:60                if Polypeptide.is_aa(residue.get_resname(), standard=True):61                    atoms = list(residue.get_atoms())62                    x = []63                    y = []64                    z = []65                    for atom in atoms:66                        vec = atom.get_vector()67                        x.append(vec.__getitem__(0))68                        y.append(vec.__getitem__(1))69                        z.append(vec.__getitem__(2))70                    # calculate position of residue71                    x = round(mean(x))72                    y = round(mean(y))73                    z = round(mean(z))74                    # one letter code75                    code = Polypeptide.three_to_one(residue.get_resname())76                    aa = amino_acid[code]77                    protein_matrix[0][col] = aa["code"]78                    protein_matrix[1][col] = x79                    protein_matrix[2][col] = y80                    protein_matrix[3][col] = z81                    protein_matrix[4][col] = aa["hydropathy"]82                    protein_matrix[5][col] = aa["hydropathy_index"]83                    protein_matrix[6][col] = aa["acidity_basicity"]84                    protein_matrix[7][col] = aa["mass"]85                    protein_matrix[8][col] = aa["isoelectric_point"]86                    protein_matrix[9][col] = aa["charge"]87                # Even if the current residue is not amino acid we increase the col.88                # 0 is save at this position if it is not an amino acid.89                col = col + 190    except IndexError:91        if truncate_log is not None:92            truncate_log.set_description_str(f"Protein {filename} is truncated.")93    # Prepare dict so it can be load to vaex dataframe94    dic: BuildMatrixDict = {95        "seq": [[]],96        "x_pos": [[]],97        "y_pos": [[]],98        "z_pos": [[]],99        "hydropathy": [[]],100        "hydropathy_index": [[]],101        "acidity_basicity": [[]],102        "mass": [[]],103        "isoelectric_point": [[]],104        "charge": [[]],105    }106    for i in range(10):107        dic[col_name[i]] = pyarrow.array(108            [[protein_matrix[i][x] for x in range(PROTEIN_SEQ_MAX_LEN)]]109        )110    return dic111def build_df_from_dic(112    protein_a: BuildMatrixDict,113    protein_b: BuildMatrixDict,114    interaction_type: Union[bool, None] = None,115) -> vaex.dataframe.DataFrame:116    """Build dataframe using protein dict.117    Args:118        protein_a: Protein A in the form of BuildMatrixDict.119        protein_b: Protein B in the form of BuildMatrixDict.120        interaction_type: boolean, true if protein interacts.121    Returns:122        vaex dataframe.123    """124    if interaction_type is not None:125        interaction_array = (126            pyarrow.array([[1, 0]]) if interaction_type else pyarrow.array([[0, 1]])127        )128    else:129        interaction_array = [[]]130    return vaex.from_arrays(131        proteinA_seq=protein_a[col_name[0]],132        proteinB_seq=protein_b[col_name[0]],133        proteinA_x=protein_a[col_name[1]],134        proteinB_x=protein_b[col_name[1]],135        proteinA_y=protein_a[col_name[2]],136        proteinB_y=protein_b[col_name[2]],137        proteinA_z=protein_a[col_name[3]],138        proteinB_z=protein_b[col_name[3]],139        proteinA_hydropathy=protein_a[col_name[4]],140        proteinB_hydropathy=protein_b[col_name[4]],141        proteinA_hydropathy_index=protein_a[col_name[5]],142        proteinB_hydropathy_index=protein_b[col_name[5]],143        proteinA_acidity_basicity=protein_a[col_name[6]],144        proteinB_acidity_basicity=protein_b[col_name[6]],145        proteinA_mass=protein_a[col_name[7]],146        proteinB_mass=protein_b[col_name[7]],147        proteinA_isoelectric_point=protein_a[col_name[8]],148        proteinB_isoelectric_point=protein_b[col_name[8]],149        proteinA_charge=protein_a[col_name[9]],150        proteinB_charge=protein_b[col_name[9]],151        interaction=interaction_array,152    )153def save_build_df(154    list_of_logs: List[tqdm.tqdm], df_base_path: str, save_path: str, index_path: str155) -> None:156    """Saving progress of build input from json.157    Args:158        list_of_logs: list of tqdm logs.159        df_base_path: path of last saved df.160        save_path: path to save new df.161        index_path: path of the file containing index information162    """163    for logger in list_of_logs:164        logger.clear()165        logger.close()166    length = 0167    with open(index_path) as fp:168        length = int(fp.read())169    if length != 0:170        expected_time = (length // 300) + 1171        print("Please don't press ctrl+c...")172        print("Trying to build and save dataframe...")173        print(f"On average machine, this will take around {expected_time}min")174        df = read_dataframes_from_file(175            [os.path.join(df_base_path, str(x)) for x in range(length + 1)]176        )177        save_dataframe_to_file(df, save_path)178def build_input_from_json_intermediate_step(179    protein_a: str,180    protein_b: str,181    pdb_file_path: str,182    current_log: tqdm.tqdm,183    truncate_log: tqdm.tqdm,184    interaction_type: bool,185) -> vaex.dataframe:186    """Intermediate step for build input from json.187    Args:188        protein_a: first protein id.189        protein_b: second protein id.190        pdb_file_path: root location of pdb files.191        current_log: tqdm logger for current status.192        truncate_log: tqdm logger for truncate status.193        interaction_type: interaction status of both protein194    Returns:195        vaex dataframe.196    """197    current_log.set_description_str(f"Processing  [{protein_a}, {protein_b}]")198    a = build_matrix(199        os.path.join(pdb_file_path, f"{protein_a}.pdb"), protein_a, truncate_log200    )201    b = build_matrix(202        os.path.join(pdb_file_path, f"{protein_b}.pdb"), protein_b, truncate_log203    )204    return build_df_from_dic(a, b, interaction_type)205def get_proteins_list_from_json(file_path: str) -> Tuple[List[str], List[str]]:206    """Get proteins list from json.207    Args:208        file_path: path of the json file.209    Returns:210        Tuple of protein list.211    """212    import json213    protein_json = {}214    with open(file_path) as fp:215        protein_json = json.load(fp)216    protein_list = []217    for _, value in protein_json.items():218        protein_list.append(value)219    return protein_list[0], protein_list[1]220def build_input_from_json(221    path: str, db_name: str, filename: str, interaction_type: bool222) -> None:223    """Build input from json file.224    Args:225        path: path of json file.226        db_name: name of the database.227        filename: name of the output file containing df.228        interaction_type: boolean, true if protein interacts.229    """230    import os231    import warnings232    warnings.simplefilter("ignore")233    BASE_DATA_DIR = os.path.realpath(234        os.path.abspath(235            os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "data")236        )237    )238    file_path = os.path.join(BASE_DATA_DIR, "processed", "protein_id", path)239    pdb_file_path = os.path.join(BASE_DATA_DIR, "raw", "pdb")240    protein_list_a, protein_list_b = get_proteins_list_from_json(file_path)241    total = min(len(protein_list_a), len(protein_list_b))242    current_log = tqdm.tqdm(total=0, position=1, bar_format="{desc}", leave=False)243    truncate_log = tqdm.tqdm(total=0, position=2, bar_format="{desc}", leave=False)244    loggers = [current_log, truncate_log]245    df_chunk_base_path = os.path.join("input", db_name, "df_chunks")246    save_df_path = os.path.join("input", db_name, filename)247    row_already_processed_path = os.path.join(248        BASE_DATA_DIR, "processed", "input", db_name, f"{filename}_processed_row.txt"249    )250    start = 0251    try:252        with open(row_already_processed_path) as fp:253            start = int(fp.read())254    except IOError:255        start = 0256    try:257        progress_log = tqdm.tqdm(total=total, position=0, leave=False, unit="pairs")258        progress_log.update(start)259        loggers.append(progress_log)260        for i in range(start, total):261            try:262                df = build_input_from_json_intermediate_step(263                    protein_list_a[i],264                    protein_list_b[i],265                    pdb_file_path,266                    current_log,267                    truncate_log,268                    interaction_type,269                )270                save_dataframe_to_file(df, os.path.join(df_chunk_base_path, str(i)))271                with open(row_already_processed_path, "w") as fp:272                    fp.write(str(i))273                progress_log.update(1)274            except KeyboardInterrupt:275                break276        print("Completed...")277        save_build_df(278            loggers, df_chunk_base_path, save_df_path, row_already_processed_path279        )280    except KeyboardInterrupt:281        save_build_df(282            loggers, df_chunk_base_path, save_df_path, row_already_processed_path...clear_log.py
Source:clear_log.py  
...18	if os.path.getsize(file_path) >= FILE_SIZE:19		return True20	else:21		return False22def truncate_log(abs_path, flist):23	os.chdir(abs_path)24	for f in flist:25		if os.path.isfile(f):26			# logging.info('compress start [%s %s]', abs_path, f)27			logging.info('compress start  [%s]', f)28			reduce_files(f)29			remove_files(f)30			logging.info('compress end [%s]',  f)31def ret_abs_dir(f_path):32	ret = {}33	fs = []34	for root, dirs, files in os.walk(f_path):35		for name in files:36			if not 'tar.gz' in name and '.log.' in name:37				#ret.append(os.path.join(root, name))38				logging.debug('walk to path %s [] file %s', root, name)39				fs.append(name)40				ret[root] = fs41	logging.debug('return dic is %s', ret)42	return ret43def remove_files(file_name):44	if os.path.isfile(file_name):45		os.remove(file_name)46#not use47def ret_file_size(f_list):48	for f in range(len(f_list)):49		if get_file_size(f_list[f]):50			truncate_log(f_list[f])51	return 052def run(file_dic):53	"""54	input: file_dic is dictory55	"""56	for k,v in file_dic.items():57		logging.debug("input dic key:%s value:%s", k, v)58		truncate_log(k,v)59def main():60	for i in LOG_DIR:61		all_files = ret_abs_dir(i)62		run(all_files)63		#ret_file_size(all_files)64if __name__ == '__main__':...sample.py
Source:sample.py  
...11                    help='If specified, database mist be closed first', dest='vacuum')12args = parser.parse_args()13log = v8log(args.db_file)14if args.to_date == '':15    log.truncate_log()16else:17    log.truncate_log(datetime.strptime(args.to_date, '%Y-%m-%d'))18if args.vacuum:
...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
