Best Python code snippet using lemoncheesecake
feature_extraction_helpers.py
Source:feature_extraction_helpers.py  
1# Imports2import Cluster3import extract4import glob5import partition6import pickle7import re8import timeit9import Visualiser10import numpy as np11import math12from pysal.esda.getisord import G_Local13from pysal.weights.Distance import DistanceBand14from descartes import PolygonPatch15import matplotlib.pyplot as plt16from tqdm import tqdm17import seaborn as sb18from matplotlib import cm19from matplotlib.ticker import LinearLocator, FormatStrFormatter20from shapely.geometry import Point, Polygon, LineString21from mpl_toolkits.mplot3d import Axes3D22def get_tags(hierarchy_tags):23    """24    Gets the x-y coordinates of the piecewise line defining the invasive margin.25    """26    tags = []27    x_tags, y_tags = [], []28    for tag in hierarchy_tags:29        if "X=" in tag and "Y=" in tag:30            x = tag.split("\"")[1]31            y = tag.split("\"")[3]32            x_tags.append(int(x))33            y_tags.append(int(y))34    for i, j in zip(x_tags, y_tags):35        tags.append((int(i), int(j)))36    return tags37def load_linestring(polygon_file):38    """39    Loads the x-y coordinates and creates a LineString object from them.40    This is useful for establishing the distance of cells from the41    invasive margin.42    """43    poly = None44    with open(polygon_file, "r") as f:45        hierarchy = f.read()46        p = re.compile("<*")47        hierarchy_tags = p.split(hierarchy)48        tags = get_tags(hierarchy_tags)49        poly = LineString(tags)50    if poly is None:51        raise Exception52    else:53        return poly54def load_shape(polygon_file):55    """56    Loads the x-y coordinates and creates a Polygon object from them.57    This is useful for establishing whether a given object is within58    the invasive margin or not.59    """60    poly = None61    with open(polygon_file, "r") as f:62        hierarchy = f.read()63        p = re.compile("<*")64        hierarchy_tags = p.split(hierarchy)65        tags = get_tags(hierarchy_tags)66        poly = Polygon(tags)67    if poly is None:68        raise Exception69    else:70        return poly71def plot_line_and_shape(ax, linestring, shape):72    """73    Given a matplotlib axes object, plots the Shapely LineString and Polygon74    objects onto the axes. Returns the axes.75    """76    patch = PolygonPatch(shape, fc="red", alpha=0.5, zorder=2)77    ax.add_patch(patch)78    xs = [x for (x, y) in linestring.coords]79    ys = [y for (x, y) in linestring.coords]80    ax.plot(xs, ys)81    return ax82def get_margin_cells(all_cells, linestring, my_distance):83    """84    Gets all cells that are within my_distance of the invasive margin.85    This includes TB, CD3, and CD8.86    """87    margin_cell_list = []88    assert (type(all_cells) == np.ndarray and all_cells.shape[1] >= 4), "type of all_cells is {}, all_cells.shape is {}".format(type(all_cells), all_cells.shape)89    x_avgs = (all_cells[:, 0] + all_cells[:, 1]) / 290    y_avgs = (all_cells[:, 2] + all_cells[:, 3]) / 291    xy_avgs = np.array((x_avgs, y_avgs)).transpose()92    for i in range(len(all_cells)):93        p = Point(xy_avgs[i])94        if p.distance(linestring) <= my_distance:95            margin_cell_list.append(all_cells[i])96    return np.asarray(margin_cell_list)97def get_core_cells(all_cells, shape, linestring, my_distance):98    """99    Gets all cells that are inside the invasive margin, but not within my_distance to it.100    This includes TB, CD3, and CD8.101    """102    core_cell_list = []103    assert (type(all_cells) == np.ndarray and all_cells.shape[1] >= 4), "type of all_cells is {}, all_cells.shape is {}".format(type(all_cells), all_cells.shape)104    x_avgs = (all_cells[:, 0] + all_cells[:, 1]) / 2105    y_avgs = (all_cells[:, 2] + all_cells[:, 3]) / 2106    xy_avgs = np.array((x_avgs, y_avgs)).transpose()107    for i in range(len(all_cells)):108        p = Point(xy_avgs[i])109        if p.distance(linestring) > my_distance and p.within(shape):110            core_cell_list.append(all_cells[i])111    return np.asarray(core_cell_list)112def get_margin_core_cells(all_cells, shape, linestring, my_distance):113    """114    Excludes all cells that are more than my_distance outside the115    invasive margin. This includes TB, CD3, and CD8.116    """117    margin_cell_list, core_cell_list = [], []118    assert (type(all_cells) == np.ndarray and all_cells.shape[1] >= 4), "type of all_cells is {}, all_cells.shape is {}".format(type(all_cells), all_cells.shape)119    x_avgs = (all_cells[:, 0] + all_cells[:, 1]) / 2120    y_avgs = (all_cells[:, 2] + all_cells[:, 3]) / 2121    xy_avgs = np.array((x_avgs, y_avgs)).transpose()122    for i in range(len(all_cells)):123        p = Point(xy_avgs[i])124        if p.distance(linestring) <= my_distance:125            margin_cell_list.append(all_cells[i])126        elif p.within(shape):127            core_cell_list.append(all_cells[i])128    return np.asarray(margin_cell_list), np.asarray(core_cell_list)129def get_lymphocytes_on_margin(cd3cd8, annotations_file):130    """131    Gets all lymphocytes (CD3/CD8) within 500um of the invasive margin.132    """133    all_cd3, all_cd8 = cd3cd8134    linestring = load_linestring(annotations_file)135    margin_cd3, margin_cd8 = [], []136    margin_cd3, margin_cd8 = np.asarray(get_margin_cells(all_cd3, linestring, 500)), np.asarray(get_margin_cells(all_cd8, linestring, 500))137    return margin_cd3, margin_cd8138def get_lymphocytes_on_margin_and_core(cd3cd8, annotations_file):139    """140    Gets all lymphocytes (CD3/CD8) except those that are more than 500um outside141    the invasive margin142    """143    all_cd3, all_cd8 = cd3cd8144    shape = load_shape(annotations_file)145    linestring = load_linestring(annotations_file)146    margin_cd3, core_cd3 = get_margin_core_cells(all_cd3, shape, linestring, 500)147    margin_cd8, core_cd8 = get_margin_core_cells(all_cd8, shape, linestring, 500)148    return np.asarray(margin_cd3), np.asarray(core_cd3), np.asarray(margin_cd8), np.asarray(core_cd8)149def partition_lymphocytes_for_heatmap(items, tile_size=886, to_list=True, input_type="clean",150                                      by_metric=True, scale=1):151    """152    Given appropriate parameters and a list of lymphocytes,153    divide them into a (T x T) grid and take the counts of each type154    of object for each tile. Return these 'heatmaps', rotated accordingly.155    """156    partitioned_items, _, _, _, _, _, _, _ = partition.partition(items, tile_size=tile_size,157                                                                 to_list=to_list, input_type=input_type,158                                                                 by_metric=by_metric, scale=scale)159    heatmap_cd3 = np.zeros(partitioned_items.shape, dtype=int)160    heatmap_cd8 = np.zeros(partitioned_items.shape, dtype=int)161    heatmap_cd3cd8 = np.zeros(partitioned_items.shape, dtype=int)162    (width, height) = partitioned_items.shape163    for col in range(width):164        for row in range(height):165            cells = partitioned_items[col][row]166            heatmap_cd3cd8[col][row] += len(cells)167            for cell in cells:168                if cell[4] > 0:169                    heatmap_cd3[col][row] += 1170                elif cell[5] > 0:171                    heatmap_cd8[col][row] += 1172    heatmap_cd3 = np.flip(np.rot90(heatmap_cd3), 0)173    heatmap_cd8 = np.flip(np.rot90(heatmap_cd8), 0)174    heatmap_cd3cd8 = np.flip(np.rot90(heatmap_cd3cd8), 0)175    return heatmap_cd3, heatmap_cd8, heatmap_cd3cd8176def partition_tb_for_heatmap(items, tile_size=886, to_list=True, input_type="clean",177                             by_metric=True, scale=1):178    """179    Given appropriate parameters and a list of tumour buds,180    divide them into a (T x T) grid and take the counts of tumour buds181    for each tile. Return this 'heatmap', rotated/flipped accordingly.182    """183    partitioned_tb, _, _, _, _, _, _, _ = partition.partition(items, tile_size=tile_size,184                                                              to_list=to_list, input_type=input_type,185                                                              by_metric=by_metric, scale=scale)186    heatmap_tb = np.zeros(partitioned_tb.shape, dtype=int)187    for i in range(partitioned_tb.shape[0]):188        for j in range(partitioned_tb.shape[1]):189            heatmap_tb[i][j] += len(partitioned_tb[i][j])190    heatmap_tb = np.flip(np.rot90(heatmap_tb), 0)191    return heatmap_tb192def partition_nearby_lymphocytes_tb_for_heatmap(items, tile_size=886, to_list=True, input_type="mixed",193                                                by_metric=True, scale=1):194    """195    Given appropriate parameters and a list of tumour buds and lymphocytes within 50um of any tumour bud,196    divide them into a (T x T) grid and take the counts of tumour buds and lymphocytes197    for each tile. Return these 'heatmaps', rotated accordingly.198    """199    partitioned_items, tiles, \200        lymph_w, lymph_h,     \201        clust_w, clust_h,     \202        x_step, y_step = partition.partition(items, tile_size=tile_size,203                                             to_list=to_list, input_type=input_type,204                                             by_metric=by_metric, scale=scale)205    counts = Cluster.get_lymphocyte_cluster_density_heatmap(partitioned_items, partitioned_items.shape, tiles,206                                                            lymph_w, lymph_h, clust_w, clust_h, x_step, y_step)207    heatmap_tb = np.zeros(counts.shape, dtype=int)208    heatmap_nearby_cd3 = np.zeros(counts.shape, dtype=int)209    heatmap_nearby_cd8 = np.zeros(counts.shape, dtype=int)210    heatmap_nearby_cd3cd8 = np.zeros(counts.shape, dtype=int)211    for i in range(counts.shape[0]):212        for j in range(counts.shape[1]):213            heatmap_nearby_cd3[i][j] += counts[i][j][0]214            heatmap_nearby_cd8[i][j] += counts[i][j][1]215            heatmap_nearby_cd3cd8[i][j] += counts[i][j][2]216            heatmap_tb[i][j] += counts[i][j][3]217    heatmap_nearby_cd3 = np.flip(np.rot90(heatmap_nearby_cd3), 0)218    heatmap_nearby_cd8 = np.flip(np.rot90(heatmap_nearby_cd8), 0)219    heatmap_nearby_cd3cd8 = np.flip(np.rot90(heatmap_nearby_cd3cd8), 0)220    heatmap_tb = np.flip(np.rot90(heatmap_tb), 0)221    return heatmap_tb, heatmap_nearby_cd3, heatmap_nearby_cd8, heatmap_nearby_cd3cd8222def get_cluster_code(cluster_name):223    """224    Path/to/file/.../ -> split on the slash and get the last token225    SN---.p -> just truncate the last 2 characters226    """227    filename = cluster_name.split('/')[-1]228    return filename[0:-2]229def get_annotations_code(annotations_name):230    """231    SN---_----_job----.annotations -> get the first 5 characters232    """233    filename = annotations_name.split('/')[-1]234    return filename[0:5]235def load_cluster_list(cluster_files):236    """237    Cluster file, load through pickle.238    """239    for cluster_filename in sorted(glob.glob(cluster_files)):240        cluster_data = pickle.load(open(cluster_filename, "rb"))241        yield cluster_filename, cluster_data242def load_lymphocyte_list(lymphocyte_files):243    """244    Lymphocyte file, load through csv.245    """246    for lymphocyte_filename in sorted(glob.glob(lymphocyte_files)):247        lymphocyte_data = extract.load(lymphocyte_filename)248        yield lymphocyte_filename, lymphocyte_data249def load_len_lymphocyte_list(lymphocyte_files):250    """251    Lymphocyte file, load through csv.252    """253    for lymphocyte_filename in sorted(glob.glob(lymphocyte_files)):254        lymphocyte_data = extract.load(lymphocyte_filename)255        yield lymphocyte_filename, len(lymphocyte_data)256def load_cluster_tb_metadata(cluster_metadata_files):257    """258    Cluster metadata file, load through .txt file.259    Get the appropriate line, get the val at the end.260    """261    for metadata_filename in tqdm(sorted(glob.glob(cluster_metadata_files))):262        f = open(metadata_filename, 'r')263        contents = f.readlines()264        cluster_count_val = -1265        tb_count_val = -1266        for line in contents:267            if "CLU" in line:268                cluster_count_val = int(line.split(": ")[-1])269            if "TB" in line:270                tb_count_val = int(line.split(": ")[-1])271        if cluster_count_val != -1 and tb_count_val != -1:272            yield metadata_filename, cluster_count_val, tb_count_val273        else:274            raise Exception("No cluster/TB value associated in file " + metadata_filename)275def load_lymphocyte_metadata(lymphocyte_metadata_files):276    """277    Lymphocyte metadata file, load through .txt file.278    Get the appropriate line, get the val at the end.279    """280    for metadata_filename in tqdm(sorted(glob.glob(lymphocyte_metadata_files))):281        f = open(metadata_filename, 'r')282        contents = f.readlines()283        cd3cd8_count_val = -1284        cd3_count_val = -1285        cd8_count_val = -1286        for line in contents:287            if "CD3&CD8" in line:288                cd3cd8_count_val = int(line.split(": ")[-1])289                continue290            if "CD3" in line:291                cd3_count_val = int(line.split(": ")[-1])292                continue293            if "CD8" in line:294                cd8_count_val = int(line.split(": ")[-1])295                continue296        if cd3cd8_count_val > -1 and cd3_count_val > -1 and cd8_count_val > -1:297            yield metadata_filename, cd3_count_val, cd8_count_val, cd3cd8_count_val298        else:299            raise Exception("No CD3/CD8/CD3CD8 value associated in file " + metadata_filename)300def load_margin(margin_files):301    """302    Margin defining shape of invasive margin, load through helpers.303    """304    for margin_filename in sorted(glob.glob(margin_files)):305        linestring, shape = load_linestring(margin_filename), load_shape(margin_filename)306        yield margin_filename, linestring, shape307def load_meta(obj_type, meta_data_files, code_extract_func):308    """309    meta_data_values = [(meta_code1, meta_val1), (meta_code2, meta_val2), ...]310    """311    meta_data_values = []312    for meta_data_filename in tqdm(sorted(glob.glob(meta_data_files))):313        f = open(meta_data_filename, 'r')314        contents = f.readlines()315        obj_count_val = -1316        for line in contents:317            if obj_type in line:318                obj_count_val = int(line.split(": ")[-1])319        meta_data_values.append((code_extract_func(meta_data_filename), obj_count_val))320    return meta_data_values321def code_extract_func(filename):322    return filename.split('/')[-1].split('.')[0]323def visualisation_block(visualise_func, data, linestring, shape, title, image_filename):324    fig, ax = plt.subplots(1, 1, figsize=(6, 6))325    ax = visualise_func(data, ax, title=title)326    xs = [x for (x, y) in linestring.coords]327    ys = [y for (x, y) in linestring.coords]328    patch = PolygonPatch(shape, fc="red", alpha=0.2, zorder=2)329    ax.add_patch(patch)330    ax.plot(xs, ys)331    fig.savefig(image_filename, dpi=100, bbox_inches="tight")332def visualisation_interaction_block(data, linestring, shape, title, image_filename):333    fig, ax = plt.subplots(1, 1, figsize=(6, 6))334    ax = Visualiser.visualise_clusters_and_lymphocytes_on_ax(data, ax, title=title)335    xs = [x for (x, y) in linestring.coords]336    ys = [y for (x, y) in linestring.coords]337    patch = PolygonPatch(shape, fc="red", alpha=0.2, zorder=2)338    ax.add_patch(patch)339    ax.plot(xs, ys)340    fig.savefig(image_filename, dpi=100, bbox_inches="tight")341def statistic_generation_block(partition_func, statistic_funcs, data, meta_data_value, d, heatmap_filename_func, target_directory_stub, filename):342    heatmap = partition_func(data, meta_data_value, d)343    assert heatmap.shape[2] == 4, "Output of transform should be 3D numpy array with 4 layers"  # Layers for TB, CD3, CD8, CD3CD8 respectively344    for (statistic_name, statistic_func, statistic_mask) in statistic_funcs:345        transformed_data = statistic_func(heatmap)346        for i, obj_type in enumerate(["TB_CD3", "TB_CD8", "TB_CD3CD8"]):347            heatmap_filename = heatmap_filename_func(target_directory_stub + obj_type + "_" + str(d) + "/", filename, statistic_name)348            p_file = open(heatmap_filename, "wb")349            pickle.dump(transformed_data[:, :, i+1], p_file)350            p_file.close()351            fig, ax = plt.subplots(1, 1, figsize=(6, 6))352            title = "Heatmap for " + target_directory_stub.split('/')[-1] + obj_type + " with d=" + str(d)353            ax = Visualiser.visualise_heatmaps(transformed_data[:, :, i+1], ax, title, statistic_mask(transformed_data[:, :, i+1]))354            image_filename = heatmap_filename_func(target_directory_stub + obj_type + "_IMAGES/", filename, "_heatmap" + "_" + str(d) + "_" + statistic_name)[0:-2] + ".png"355            fig.savefig(image_filename, dpi=100, bbox_inches="tight")356def get_images_with_margins(obj_type, obj_files, expected_num_files, meta_data_files, load_obj_func, target_directory_stub, convert_func, visualise_func, image_filename_func, partition_func, heatmap_filename_func, margin_files, d):357    meta_data_pairs = load_meta(obj_type, meta_data_files, code_extract_func)358    meta_data_codes = [code for (code, val) in meta_data_pairs]359    meta_data_values = [val for (code, val) in meta_data_pairs]360    num_files = 0361    obj_file_type = obj_type if obj_type != "CD3&CD8" else "CD3CD8"362    for (filename, raw_data), (margin_filename, linestring, shape) in zip(load_obj_func(obj_files), load_margin(margin_files)):363        meta_data_value = 0364        try:365            meta_data_value = meta_data_values[meta_data_codes.index(code_extract_func(filename))]366        except:367            raise Exception("Meta-data does not have a value for file " + filename + "\nwith obj_type " + obj_type)368        all_data = convert_func(raw_data, meta_data_value)369        assert type(all_data) == np.ndarray370        ## ALL371        print(code_extract_func(filename) + ": " + "All {}".format(obj_type))372        visualisation_block(visualise_func, all_data, linestring, shape, "All {}".format(obj_type), image_filename_func(target_directory_stub + "ALL_{}/".format(obj_file_type), filename))373        statistic_generation_block(partition_func, [("_density", density)], all_data, meta_data_value, [heatmap_filename_func(target_directory_stub + "ALL_{}/".format(obj_file_type), filename)])374        margin_data, core_data = get_margin_core_cells(all_data, shape, linestring, 500)375        assert margin_data.shape[0] > 0 and margin_data.shape[1] >= 4, "Something wrong with {} margin data".format(obj_type)376        assert core_data.shape[0] > 0 and core_data.shape[1] >= 4, "Something wrong with {} core data".format(obj_type)377        ## MARGIN378        print(code_extract_func(filename) + ": " + "{} (IM)".format(obj_type))379        visualisation_block(visualise_func, margin_data, linestring, shape, "{} (IM)".format(obj_type), image_filename_func(target_directory_stub + "MARGIN_{}/".format(obj_file_type), filename))380        statistic_generation_block(partition_func, [("_density", density)], margin_data, meta_data_value, [heatmap_filename_func(target_directory_stub + "MARGIN_{}/".format(obj_file_type), filename)])381        ## CORE382        print(code_extract_func(filename) + ": " + "{} (CT)".format(obj_type))383        visualisation_block(visualise_func, core_data, linestring, shape, "{} (CT)".format(obj_type), image_filename_func(target_directory_stub + "CORE_{}/".format(obj_file_type), filename))384        statistic_generation_block(partition_func, [("_density", density)], core_data, meta_data_value, [heatmap_filename_func(target_directory_stub + "CORE_{}/".format(obj_file_type), filename)])385        ## IMCT386        print(code_extract_func(filename) + ": " + "{} (IMCT)".format(obj_type))387        imct_data = np.concatenate((margin_data, core_data), axis=0)388        assert imct_data.shape[0] == (margin_data.shape[0] + core_data.shape[0]), "Margin and core cells not added properly"389        del(margin_data)  # numpy creates a copy on concat, so safe to delete390        del(core_data)    # these two arrays391        visualisation_block(visualise_func, imct_data, linestring, shape, "{} (IMCT)".format(obj_type), image_filename_func(target_directory_stub + "IMCT_{}/".format(obj_file_type), filename))392        statistic_generation_block(partition_func, [("_density", density)], imct_data, meta_data_value, [heatmap_filename_func(target_directory_stub + "IMCT_{}/".format(obj_file_type), filename)])393        num_files += 1394    assert(num_files == expected_num_files)395def get_images_with_interactions_condensed(tb_files, lym_files, expected_num_files, meta_data_files,396                                           target_directory_stub, heatmap_transform, statistics, image_filename_func,397                                           heatmap_filename_func, margin_files, d):398    tb_meta_pairs = load_meta("TB", meta_data_files, code_extract_func)399    tb_meta_codes = [code for (code, val) in tb_meta_pairs]400    tb_meta_values = [val for (code, val) in tb_meta_pairs]401    cd3_meta_pairs = load_meta("CD3", meta_data_files, code_extract_func)402    cd8_meta_pairs = load_meta("CD8", meta_data_files, code_extract_func)403    cd3cd8_meta_pairs = load_meta("CD3&CD8", meta_data_files, code_extract_func)404    cd3_meta_codes = [code for (code, val) in cd3_meta_pairs]405    cd8_meta_codes = [code for (code, val) in cd8_meta_pairs]406    cd3cd8_meta_codes = [code for (code, val) in cd3cd8_meta_pairs]407    assert(cd3_meta_codes == cd8_meta_codes and cd8_meta_codes == cd3cd8_meta_codes)408    cd3_meta_values = [val for (code, val) in cd3_meta_pairs]409    cd8_meta_values = [val for (code, val) in cd8_meta_pairs]410    cd3cd8_meta_values = [val for (code, val) in cd3cd8_meta_pairs]411    num_files = 0412    for (tb_filename, cluster_raw_data), (lym_filename, lym_raw_data), (margin_filename, linestring, shape) in zip(load_cluster_list(tb_files),413                                                                                                                   load_lymphocyte_list(lym_files),414                                                                                                                   load_margin(margin_files)):415        tb_meta_value = 0416        cd3_meta_value, cd8_meta_value, cd3cd8_meta_value = 0, 0, 0417        try:418            tb_meta_value = tb_meta_values[tb_meta_codes.index(code_extract_func(tb_filename))]419        except:420            raise Exception("Meta-data does not have a value for file " + tb_filename + "\nwith obj_type TB")421        try:422            cd3_meta_value = cd3_meta_values[cd3_meta_codes.index(code_extract_func(lym_filename))]423            cd8_meta_value = cd8_meta_values[cd8_meta_codes.index(code_extract_func(lym_filename))]424            cd3cd8_meta_value = cd3cd8_meta_values[cd3cd8_meta_codes.index(code_extract_func(lym_filename))]425        except:426            raise Exception("Meta-data does not have a value for file " + lym_filename)427        all_tb_data = convert_cluster_to_tb(cluster_raw_data, tb_meta_value)428        all_lym_data = convert_lymphocyte_to_cd3cd8(lym_raw_data, cd3cd8_meta_value)429        assert type(all_tb_data) == np.ndarray and type(all_lym_data) == np.ndarray430        #####################431        ### CD3&CD8 BLOCK ###432        #####################433        combined_tb_lym_data = merge_tb_lym(all_tb_data, all_lym_data)434        ## ALL - TB AND CD3/CD8435        title = "All TB and CD3/CD8"436        print(code_extract_func(tb_filename) + ": " + title)437        visualisation_interaction_block(combined_tb_lym_data, linestring, shape, title, image_filename_func(target_directory_stub + "ALL_TB_CD3CD8_IMAGES/", tb_filename))438        statistic_generation_block(heatmap_transform, statistics, combined_tb_lym_data, tb_meta_value + cd3cd8_meta_value, 50, heatmap_filename_func, target_directory_stub + "ALL_", tb_filename)439        statistic_generation_block(heatmap_transform, statistics, combined_tb_lym_data, tb_meta_value + cd3cd8_meta_value, 100, heatmap_filename_func, target_directory_stub + "ALL_", tb_filename)440        start_time = timeit.default_timer()441        tb_lym_margin_data, tb_lym_core_data = get_margin_core_cells(combined_tb_lym_data, shape, linestring, 500)442        elapsed = timeit.default_timer() - start_time443        print("get_margin_core_cells() took:", elapsed)444        del(combined_tb_lym_data)445        assert tb_lym_margin_data.shape[0] > 0 and tb_lym_margin_data.shape[1] >= 4, "Something wrong with TB-CD3CD8 margin data"446        assert tb_lym_core_data.shape[0] > 0 and tb_lym_core_data.shape[1] >= 4, "Something wrong with TB-CD3CD8 core data"447        ## MARGIN - TB AND CD3/CD8448        title = "TB and CD3/CD8 (IM)"449        print(code_extract_func(tb_filename) + ": " + title)450        visualisation_interaction_block(tb_lym_margin_data, linestring, shape, title, image_filename_func(target_directory_stub + "MARGIN_TB_CD3CD8_IMAGES/", tb_filename))451        statistic_generation_block(heatmap_transform, statistics, tb_lym_margin_data, len(tb_lym_margin_data), 50, heatmap_filename_func, target_directory_stub + "MARGIN_", tb_filename)452        statistic_generation_block(heatmap_transform, statistics, tb_lym_margin_data, len(tb_lym_margin_data), 100, heatmap_filename_func, target_directory_stub + "MARGIN_", tb_filename)453        ## CORE - TB AND CD3/CD8454        title = "TB and CD3/CD8 (CT)"455        print(code_extract_func(tb_filename) + ": " + title)456        visualisation_interaction_block(tb_lym_core_data, linestring, shape, title, image_filename_func(target_directory_stub + "CORE_TB_CD3CD8_IMAGES/", tb_filename))457        statistic_generation_block(heatmap_transform, statistics, tb_lym_core_data, len(tb_lym_core_data), 50, heatmap_filename_func, target_directory_stub + "CORE_", tb_filename)458        statistic_generation_block(heatmap_transform, statistics, tb_lym_core_data, len(tb_lym_core_data), 100, heatmap_filename_func, target_directory_stub + "CORE_", tb_filename)459        ## IMCT - TB AND CD3/CD8460        title = "TB and CD3/CD8 (IMCT)"461        print(code_extract_func(tb_filename) + ": " + title)462        tb_lym_imct_data = np.concatenate((tb_lym_margin_data, tb_lym_core_data), axis=0)463        assert tb_lym_imct_data.shape[0] == (tb_lym_margin_data.shape[0] + tb_lym_core_data.shape[0]), "Margin and core cells not added properly"464        visualisation_interaction_block(tb_lym_imct_data, linestring, shape, title, image_filename_func(target_directory_stub + "IMCT_TB_CD3CD8_IMAGES/", tb_filename))465        statistic_generation_block(heatmap_transform, statistics, tb_lym_imct_data, len(tb_lym_imct_data), 50, heatmap_filename_func, target_directory_stub + "IMCT_", tb_filename)466        statistic_generation_block(heatmap_transform, statistics, tb_lym_imct_data, len(tb_lym_imct_data), 100, heatmap_filename_func, target_directory_stub + "IMCT_", tb_filename)467        num_files += 1468    assert(num_files == expected_num_files)469# def get_images_with_interactions(tb_files, tb_convert, lym_files, lym_convert, tb_lym_interaction_funcs, expected_num_files, meta_data_files, target_directory_stub, visualise_funcs, title_tree, output_filename_func, margin_files, d):470    """471    TB and do CD3, CD8, CD3&CD8 all at once.472    Four(!) levels of looping:473    for all files:474        meta vals...475        for tb_select, lym_select (must be the same region!):476            clean_tb, clean_lym = select_tb(), select_lym()477            for all interactions between current tb and lym selection (target_directories):478                create visualisation (pointwise-scatter with margin)479                for all statistics of interactions between tb and lym:480                    ...481        num_files++482    Titles, target directories either need stubs or need to be hierarichial. Visualisation funcs are fine.483    """484    tb_meta_pairs = load_meta("TB", meta_data_files, code_extract_func)485    tb_meta_codes = [code for (code, val) in tb_meta_pairs]486    tb_meta_values = [val for (code, val) in tb_meta_pairs]487    cd3_meta_pairs = load_meta("CD3", meta_data_files, code_extract_func)488    cd8_meta_pairs = load_meta("CD8", meta_data_files, code_extract_func)489    cd3cd8_meta_pairs = load_meta("CD3&CD8", meta_data_files, code_extract_func)490    cd3_meta_codes = [code for (code, val) in cd3_meta_pairs]491    cd8_meta_codes = [code for (code, val) in cd8_meta_pairs]492    cd3cd8_meta_codes = [code for (code, val) in cd3cd8_meta_pairs]493    assert(cd3_meta_codes == cd8_meta_codes and cd8_meta_codes == cd3cd8_meta_codes)494    cd3_meta_values = [val for (code, val) in cd3_meta_pairs]495    cd8_meta_values = [val for (code, val) in cd8_meta_pairs]496    cd3cd8_meta_values = [val for (code, val) in cd3cd8_meta_pairs]497    num_files = 0498    for (tb_file, tb_raw_data), (lym_file, lym_raw_data) in zip(load_cluster_list(tb_files), load_lymphocyte_list(lym_files)):499        tb_meta_value, lym_meta_value = 0, 0500        try:501            tb_meta_value = tb_meta_values[tb_meta_codes.index(code_extract_func(tb_file))]502            cd3_meta_value = cd3_meta_values[cd3_meta_codes.index(code_extract_func(lym_file))]503            cd8_meta_value = cd8_meta_values[cd8_meta_codes.index(code_extract_func(lym_file))]504            cd3cd8_meta_value = cd3cd8_meta_values[cd3cd8_meta_codes.index(code_extract_func(lym_file))]505        except:506            raise Exception("Meta-data does not have a value for file-pair (" + tb_file + ", " + lym_file + ")")507        tb_data = tb_convert(tb_raw_data, tb_meta_value)508        lym_data = lym_convert(tb_raw_data, lym_meta_value)509        for selection in region_selections:510            selected_tb = selection(tb_data, linestring, shape, d)511            selected_lym = selection(lym_data, linestring, shape, d)512            for interaction_func in tb_lym_interaction_funcs:513                tb_lym_data = interaction_func(selected_tb, selected_lym)514                for feature in feature_funcs:515                    feature_map = feature(tb_lym_data)516                    tb_data = feature_map["TB"]517                    cd3_data = feature_map["CD3"]518                    cd8_data = feature_map["CD8"]519                    cd3cd8_data = feature_map["CD3&CD8"]520            # for beta_transform, title_list, target_directory_list in zip(beta_transforms, title_tree, target_directory_tree):521            #     clean_beta_data = beta_transform(beta_raw_data, beta_meta_value)522            #     for alpha_beta_transform, visualise_func, title, target_directory in zip(alpha_beta_transforms, visualise_funcs, title_list, target_directory_list):523            #         clean_data = alpha_beta_transform(clean_alpha_data, clean_beta_data)524            #         fig, ax = plt.subplots(1, 1, figsize=(6, 6))525            #         ax = visualise_func(clean_data, ax, title=title)526            #         output_filename = output_filename_func(target_directory, alpha_file)  # doesn't matter whether alpha-file or beta-file for the name527            #         fig.savefig(output_filename, dpi=100, bbox_inches="tight")528        num_files += 1529    assert(num_files == expected_num_files)530def convert_cluster_to_tb(cluster_data, expected_size):531    data = np.array([[int(row[2]), int(row[1]), int(row[4]), int(row[3])] for row in cluster_data if int(row[0]) > 0 and int(row[0]) < 5])532    assert(len(data) == expected_size or expected_size == -1)  # Allow a by-pass in the case of results we can't tell ahead of time.533    return data534def convert_lymphocyte_to_cd3(lymphocyte_data, expected_size):535    data = np.array([[int(row[0]), int(row[1]), int(row[2]), int(row[3]), int(row[4]), int(row[5])] for row in lymphocyte_data if int(row[4]) > 0])536    assert(len(data) == expected_size or expected_size == -1)537    return data538def convert_lymphocyte_to_cd8(lymphocyte_data, expected_size):539    data = np.array([[int(row[0]), int(row[1]), int(row[2]), int(row[3]), int(row[4]), int(row[5])] for row in lymphocyte_data if int(row[5]) > 0])540    assert(len(data) == expected_size or expected_size == -1)541    return data542def convert_lymphocyte_to_cd3cd8(lymphocyte_data, expected_size):543    data = np.array([[int(row[0]), int(row[1]), int(row[2]), int(row[3]), int(row[4]), int(row[5])] for row in lymphocyte_data if int(row[4]) > 0 or int(row[5]) > 0])544    assert(len(data) == expected_size or expected_size == -1)545    return data546def density(heatmap):547    """548    Assumes a non-interacting heatmap of lists.549    """550    densities = np.vectorize(len)(heatmap)551    return densities552def density_values(heatmap):553    return heatmap  # it already is the answer554def ratio_values(heatmap):555    w, h = heatmap.shape[0], heatmap.shape[1]556    ratios = np.zeros(heatmap.shape)557    for i in range(w):558        for j in range(h):559            tb_val = heatmap[i][j][0]560            cd3_val = heatmap[i][j][1]561            cd8_val = heatmap[i][j][2]562            cd3cd8_val = heatmap[i][j][3]563            ratios[i][j][0] = 0  # don't care about tb, only the other 3564            ratios[i][j][1] = cd3_val / tb_val if tb_val != 0 else -1565            ratios[i][j][2] = cd8_val / tb_val if tb_val != 0 else -1566            ratios[i][j][3] = cd3cd8_val / tb_val if tb_val != 0 else -1567    return ratios568# def get_num_hotspots(heatmap, p_value_threshold=0.05, Z_score_threshold=1.96):569#     hotspots = np.zeros((heatmap.shape[0], heatmap.shape[1], 2))  # layer 0 corresponds to coldspots, layer 1 to hotspots570#     for i in range(array_of_heatmaps.shape[0]):571#         all_count = 0572#         for j in range(array_of_heatmaps.shape[1]):573#             num_hot, num_cold = 0, 0574#             (n1, n2) = heatmap.shape575#             points = [[(x, y) for y in range(n2)] for x in range(n1)]576#             dist = math.sqrt(2)577#             with sb.axes_style("white"):578#                 msk = heatmap != 0579#                 points = [points[r][c] for r in range(n1) for c in range(n2) if msk[r][c]]580#                 w = DistanceBand(points, threshold=dist)581#                 lg_star = G_Local(heatmap[msk], w, transform='B', star=True)582#                 Z_scores = np.zeros(heatmap.shape, dtype=np.float32)583#                 p_values = np.zeros(heatmap.shape, dtype=np.float32)584#                 ind = 0585#                 for r in range(n1):586#                     for c in range(n2):587#                         if msk[r][c]:588#                             Z_scores[r, c] = lg_star.Zs[ind]589#                             p_values[r, c] = lg_star.p_sim[ind]590#                             ind += 1591#                         else:592#                             Z_scores[r][c] = -100.0  # why -100?593#                 for ind in range(lg_star.Zs.shape[0]):594#                     if lg_star.p_sim[ind] < p_value_threshold:595#                         if lg_star.Zs[ind] > Z_score_threshold:596#                             num_hot += 1597#                         elif lg_star.Zs[ind] < -(Z_score_threshold):598#                             num_cold += 1599#                 array_of_hotspots[i, all_count] = num_cold600#                 array_of_hotspots[i, all_count+1] = num_hot601#     return heatmap602def get_tb_images(cluster_files, expected_num_files, meta_data_files, target_directory_stub, margin_files, d):603    # All, IM, CT, IM&CT604    obj_type = "TB"605    load_obj_func = load_cluster_list606    convert_func = lambda data, exp_size: convert_cluster_to_tb(data, exp_size)607    visualise_func = lambda data, ax, title: Visualiser.visualise_clusters_on_ax(data, ax, size_included=False, title=title)608    partition_func = lambda entire_list, exp_size: partition.partition(entire_list, tile_size=886, to_list=True, input_type="clean", by_metric=True, scale=1)[0]609    image_filename_func = lambda target_directory, filename: target_directory + code_extract_func(filename) + ".png"610    heatmap_filename_func = lambda target_directory, filename, statistic: target_directory + code_extract_func(filename) + statistic + ".p"611    args = (obj_type, cluster_files, expected_num_files, meta_data_files, load_obj_func,612            target_directory_stub, convert_func, visualise_func, image_filename_func, partition_func,613            heatmap_filename_func, margin_files, d)614    get_images_with_margins(*args)615def get_lym_images(lym_type, lymphocyte_files, expected_num_files, meta_data_files, target_directory_stub, margin_files, d):616    # All, IM, CT, IM&CT, for CD3, CD8, CD3&CD8617    load_obj_func = load_lymphocyte_list618    if lym_type == "CD3":619        convert_func = lambda data, exp_size: convert_lymphocyte_to_cd3(data, exp_size)620    elif lym_type == "CD8":621        convert_func = lambda data, exp_size: convert_lymphocyte_to_cd8(data, exp_size)622    else:  # CD3&CD8623        convert_func = lambda data, exp_size: convert_lymphocyte_to_cd3cd8(data, exp_size)624    if lym_type == "CD3&CD8":625        visualise_func = lambda data, ax, title: Visualiser.visualise_lymphocytes_on_ax(data, ax, title=title)626    else:627        visualise_func = lambda data, ax, title: Visualiser.visualise_one_cd_on_ax(data, ax, "CD3", title=title, size=0.1, colour='r')628    partition_func = lambda entire_list, exp_size: partition.partition(entire_list, tile_size=886, to_list=True, input_type="clean", by_metric=True, scale=1)[0]629    image_filename_func = lambda target_directory, filename: target_directory + code_extract_func(filename) + ".png"630    heatmap_filename_func = lambda target_directory, filename, statistic: target_directory + code_extract_func(filename) + statistic + ".p"631    args = (lym_type, lymphocyte_files, expected_num_files, meta_data_files, load_obj_func,632            target_directory_stub, convert_func, visualise_func, image_filename_func,633            partition_func, heatmap_filename_func, margin_files, d)634    get_images_with_margins(*args)635def partition_and_transform_tb_interactions(items, meta_data_value, d):636    """637    Mixed partition, produce heatmap of interaction given some function of interaction.638    """639    init_num_objs = len(items)640    partitioned_items, tiles, \641        lymph_w, lymph_h,     \642        clust_w, clust_h,     \643        x_step, y_step = partition.partition(items, tile_size=886, to_list=True, input_type="mixed",644                                             by_metric=True, scale=1)645    num_objs = 0646    for i in range(partitioned_items.shape[0]):647        for j in range(partitioned_items.shape[1]):648            (lymphocytes, cancer_clusters) = partitioned_items[i][j]649            num_objs += (len(lymphocytes) + len(cancer_clusters))650    assert num_objs == init_num_objs and init_num_objs == meta_data_value, "num_objs: {}, init_num_objs: {}, and meta_data_value: {}".format(num_objs, init_num_objs, meta_data_value)651    heatmap = Cluster.get_interacting_object_counts(partitioned_items, d, partitioned_items.shape, tiles,652                                                    lymph_w, lymph_h, clust_w, clust_h, x_step, y_step)653    assert heatmap.shape[0] == partitioned_items.shape[0] and heatmap.shape[1] == partitioned_items.shape[1]654    heatmap = np.flip(np.rot90(heatmap), 0)655    return heatmap656def merge_tb_lym(tb_data, lym_data):657    """658    Assumes tb_data to be np array of form:659    +------+------+------+------+660    | xMin | xMax | yMin | yMax |661    +------+------+------+------+662    | .... | .... | .... | .... |663    +------+------+------+------+664    lym data to be np array of form:665    +------+------+------+------+-----+-----+666    | xMin | xMax | yMin | yMax | CD3 | CD8 |  <-- Note that CD3 and CD8 columns are (effectively) binary classifications.667    +------+------+------+------+-----+-----+668    | .... | .... | .... | .... | ... | ... |669    +------+------+------+------+-----+-----+670    With the output to be of form:671    +------+------+------+------+-----+-----+----+672    | xMin | xMax | yMin | yMax | CD3 | CD8 | TB |  <-- As with CD3/CD8 above, TB column is for binary classification.673    +------+------+------+------+-----+-----+----+674    | .... | .... | .... | .... | ... | ... | .. |675    +------+------+------+------+-----+-----+----+676    """677    tb_data = np.concatenate((tb_data, np.zeros((tb_data.shape[0], 2)), np.ones((tb_data.shape[0], 1))), axis=1)678    lym_data = np.concatenate((lym_data, np.zeros((lym_data.shape[0], 1))), axis=1)679    mixed_data = np.concatenate((lym_data, tb_data))680    assert len(mixed_data) == len(tb_data) + len(lym_data)681    return mixed_data682# Functions for selections of data683def select_all(data, linestring, shape, d):684    return data685def select_CT(data, linestring, shape, d):686    return np.asarray(get_core_cells(data, shape, linestring, d))687def select_IM(data, linestring, shape, d):688    return np.asarray(get_margin_cells(data, linestring, d))689def select_IMCT(data, linestring, shape, d):690    margin_cells, core_cells = get_margin_core_cells(data, shape, linestring, d)691    margin_cells.extend(core_cells)692    return np.asarray(margin_cells)693def get_tb_cd3_cd8_cd3cd8_images(root_dir, meta_data_dir, target_dir_root, expected_num_files):694    """695    All input dirs should have a terminating slash696    This function assumes that select(data, linestring, shape, d) => return convert(data), i.e. ALL-DATA, no interaction with margin697    """698    # TODO: confirm that d = 500um is the correct distance from margin to allow699    d = 500700    tb_target_directory_stub = target_dir_root+"IMAGES/"701    cd3_target_directory_stub = tb_target_directory_stub702    cd8_target_directory_stub = tb_target_directory_stub703    cd3cd8_target_directory_stub = tb_target_directory_stub704    tb_options = (root_dir+"TB_pickle_files/*", expected_num_files, meta_data_dir+"*", tb_target_directory_stub, root_dir+"margin_files/*", d)705    cd3_options = ("CD3", root_dir+"lymphocyte_csv_files/*", expected_num_files, meta_data_dir+"*", cd3_target_directory_stub, root_dir+"margin_files/*", d)706    cd8_options = ("CD8", root_dir+"lymphocyte_csv_files/*", expected_num_files, meta_data_dir+"*", cd8_target_directory_stub, root_dir+"margin_files/*", d)707    cd3cd8_options = ("CD3&CD8", root_dir+"lymphocyte_csv_files/*", expected_num_files, meta_data_dir+"*", cd3cd8_target_directory_stub, root_dir+"margin_files/*", d)708    get_tb_images(*tb_options)  # cluster_files, expected_num_files, meta_data_files, target_directories, margin_files, d709    get_lym_images(*cd3_options)  # (lym_type, lymphocyte_files, expected_num_files, meta_data_files, target_directories, margin_files, d)710    get_lym_images(*cd8_options)711    get_lym_images(*cd3cd8_options)712def get_tb_cd3_cd8_cd3cd8_interaction_images(root_dir, target_dir_root, expected_num_files):713    """714    All input dirs should have a terminating slash715    This function assumes that select(data, linestring, shape, d) returns:716     > return convert(data)  // ALL-DATA, but display with margin717     > return get_core(convert(data), linestring, shape, d)  // CORE TUMOUR (CT)718     > return get_margin(convert(data), linestring, shape, d)  // INVASIVE MARGIN (IM)719     > return get_margin_and_core(convert(data), linestring, shape, d)  // IM & CT720    """721    # d is the maximum distance between nearby lymphocytes and any TB722    d = 50723    target_directory_stub = target_dir_root+"IMAGES/"724    tb_files = root_dir+"TB_pickle_files/*"725    lym_files = root_dir+"lymphocyte_csv_files/*"726    margin_files = root_dir + "margin_files/*"727    meta_data_files = root_dir+"meta_data/*"728    partition_func = partition_and_transform_tb_interactions729    statistics = [("_density", density_values, lambda heatmap: heatmap == 0), ("_ratio", ratio_values, lambda heatmap: heatmap == -1)]730    image_filename_func = lambda target_directory, filename: target_directory + code_extract_func(filename) + ".png"731    heatmap_filename_func = lambda target_directory, filename, statistic: target_directory + code_extract_func(filename) + statistic + ".p"732    args = (tb_files, lym_files, expected_num_files, meta_data_files, target_directory_stub, partition_func, statistics, image_filename_func, heatmap_filename_func, margin_files, d)733    get_images_with_interactions_condensed(*args)734        # ax = fig.gca(projection='3d')735        # Add margin736        # ax = plot_line_and_shape(ax, linestring, shape)737        # X_3D = np.arange(0, heatmap_tb.shape[1], 1)738        # Y_3D = np.arange(0, heatmap_tb.shape[0], 1)739        # X_3D, Y_3D = np.meshgrid(X_3D, Y_3D)740        # Z = heatmap_tb741        # # print(X_3D.shape, Y_3D.shape, Z.shape)742        # # Plot the surface.743        # surf = ax.plot_surface(X_3D, Y_3D, Z, cmap=cm.hot, linewidth=0, antialiased=False, alpha=1)744        # # # Add a color bar which maps values to colors.745        # fig.colorbar(surf, shrink=0.5, aspect=5)746        # plt.show()...stats.py
Source:stats.py  
1'''2Created on Feb 14, 20173@author: nicolas4'''5from __future__ import print_function6from functools import reduce7from lemoncheesecake.helpers.console import print_table, bold8from lemoncheesecake.cli.command import Command9from lemoncheesecake.cli.utils import load_suites_from_project10from lemoncheesecake.filter import add_test_filter_cli_args, make_test_filter11from lemoncheesecake.testtree import flatten_suites12from lemoncheesecake.project import load_project13class Stats:14    def __init__(self):15        self.tests_nb = 016        self.disabled_tests_nb = 017        self.suites_nb = 018        self.non_empty_suites_nb = 019        self.tags = {}20        self.properties = {}21        self.links = {}22def compute_stats(suites):23    stats = Stats()24    for suite in flatten_suites(suites):25        stats.suites_nb += 126        for test in suite.get_tests():27            stats.tests_nb += 128            if test.is_disabled():29                stats.disabled_tests_nb += 130            for tag in test.hierarchy_tags:31                stats.tags[tag] = stats.tags.get(tag, 0) + 132            for prop, value in test.hierarchy_properties.items():33                if prop not in stats.properties:34                    stats.properties[prop] = {}35                if value not in stats.properties[prop]:36                    stats.properties[prop][value] = 037                stats.properties[prop][value] += 138            for link in test.hierarchy_links:39                stats.links[link] = stats.links.get(link, 0) + 140        else:41            stats.non_empty_suites_nb += 142    return stats43class StatsCommand(Command):44    def get_name(self):45        return "stats"46    def get_description(self):47        return "Display statistics about the project's tests"48    def add_cli_args(self, cli_parser):49        add_test_filter_cli_args(cli_parser)50    def run_cmd(self, cli_args):51        project = load_project()52        suites = load_suites_from_project(project, make_test_filter(cli_args))53        stats = compute_stats(suites)54        def percent_of_tests(val):55            return "%2d%%" % (float(val) / stats.tests_nb * 100)56        # Show tags57        lines = []58        for tag in sorted(stats.tags.keys(), key=lambda k: stats.tags[k], reverse=True):59            lines.append([bold(tag), stats.tags[tag], percent_of_tests(stats.tags[tag])])60        print_table(bold("Tags"), ["Tag", "Tests", "In %"], lines)61        # Show properties62        lines = []63        prop_names = sorted(64            stats.properties.keys(),65            key=lambda k: reduce(lambda x, y: x + y, stats.properties[k].values(), 0),66            reverse=True67        )68        for prop_name in prop_names:69            prop_values = sorted(70                stats.properties[prop_name].keys(),71                key=lambda k: stats.properties[prop_name][k],72                reverse=True73            )74            for prop_value in prop_values:75                lines.append([76                    bold(prop_name), bold(prop_value),77                    stats.properties[prop_name][prop_value],78                    percent_of_tests(stats.properties[prop_name][prop_value])79                ])80        print_table(bold("Properties"), ["Property", "Value", "Tests", "In %"], lines)81        # Show links82        lines = []83        for link in sorted(stats.links.keys(), key=lambda k: stats.links[k], reverse=True):84            lines.append([85                bold(link[1] or "-"), link[0], stats.links[link], percent_of_tests(stats.links[link])86            ])87        print_table(bold("Links"), ["Name", "URL", "Tests", "In %"], lines)88        tests_info = bold("%d tests" % stats.tests_nb)89        if stats.disabled_tests_nb > 0:90            tests_info += " (among which %s disabled tests)" % stats.disabled_tests_nb91        suites_info = bold("%d suites" % stats.non_empty_suites_nb)92        if stats.suites_nb > stats.non_empty_suites_nb:93            suites_info += " (+ %d empty suites)" % (stats.suites_nb - stats.non_empty_suites_nb)94        print("Total: %s in %s" % (tests_info, suites_info))95        print()...margin.py
Source:margin.py  
1import re2import matplotlib.pyplot as plt3from shapely.geometry import Point, Polygon, MultiPolygon4from descartes import PolygonPatch5# 2016_12_17__0690.annotations6# name = "2016_12_17__0765(Ext).annotations"7name = "2016_12_17__0690.annotations"8hierarchy = open("./Daniel line/" + name, "r").read()9p = re.compile("<*")10hierarchy_tags = p.split(hierarchy)11print(len(hierarchy_tags))12x_tags = []13y_tags = []14for tag in hierarchy_tags:15    if "X=" in tag and "Y=" in tag:16        x = tag.split("\"")[1]17        y = tag.split("\"")[3]18        x_tags.append(int(x))19        y_tags.append(int(y))20tags = []21# We have 4 points22# For each of 4 points:23# - is within core tumour? (core_tumour.contains(point)) // inner line24# - if no, then check: tumour_front.contains(point) or invasive_front.contains(point) // second or third lines25#26for i, j in zip(x_tags, y_tags):27    tags.append((i, j))28fig = plt.figure(1)29# 130ax = fig.add_subplot(111)31poly = Polygon(tags)32def plot_line(ax, ob):33    x, y = ob.xy34def construct_shape(poly, distance, delta, update):35    difference = 036    if type(poly.buffer(distance)) is MultiPolygon:37        difference = update(difference, delta)38        while type(poly.buffer(distance + difference)) is MultiPolygon:39            difference = update(difference, delta)40        new_poly = poly.buffer(distance + difference)41        return new_poly.buffer(-difference)42    else:43        return poly.buffer(distance)44def construct_shape_add_on_new_shape(poly, distance, delta, update):45    padding = update(0, delta)46    difference = 047    if type(poly.buffer(distance)) is MultiPolygon:48        print("Boo!")49        shape = poly50        difference = update(difference, delta)51        while type(shape.buffer(distance + padding)) is MultiPolygon:52            shape = shape.buffer(distance + padding)53            difference = update(difference, delta)54        return shape.buffer(-difference)55    else:56        return poly.buffer(distance)57# Iterative increasing approach58dilated = construct_shape(poly, 500, 1, lambda x, y: x + y)59contracted = construct_shape(dilated, -1000, 1, lambda x, y: x - y)60print(type(contracted))61print(MultiPolygon)62if type(contracted) is MultiPolygon:63    print("Uh oh. Contracted for " + name + " invalid.")64    int_x = [i[0] for po in contracted for i in po.exterior.coords]65    int_y = [i[1] for po in contracted for i in po.exterior.coords]66else:67    int_x = [i[0] for i in contracted.exterior.coords]68    int_y = [i[1] for i in contracted.exterior.coords]69if type(dilated) is MultiPolygon:70    print("Uh oh. Dilated for " + name + " invalid.")71    ext_x = [i[0] for po in dilated for i in po.exterior.coords]72    ext_y = [i[1] for po in dilated for i in po.exterior.coords]73else:74    ext_x = [i[0] for i in dilated.exterior.coords]75    ext_y = [i[1] for i in dilated.exterior.coords]76plt.plot(ext_x, ext_y, color="r")77plt.plot(int_x, int_y, color="b")78patch = PolygonPatch(poly, fc="red", alpha=0.5, zorder=2)79ax.add_patch(patch)80# plt.plot(x_tags, y_tags)81plt.show()...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
