How to use distinct_labels method in hypothesis

Best Python code snippet using hypothesis

latent_plot_utils.py

Source:latent_plot_utils.py

1import os2import sys3import numpy as np4import matplotlib.pyplot as plt5import torch6from sklearn.manifold import TSNE7from sklearn.decomposition import PCA8from matplotlib import cm9from matplotlib.legend_handler import HandlerLine2D, HandlerTuple10import tikzplotlib11from os.path import join as pjoin12BASEPATH = os.path.dirname(os.path.abspath(__file__))13sys.path.insert(0, BASEPATH)14sys.path.insert(0, pjoin(BASEPATH, '..'))15from py_utils import ensure_dirs16def distinct_labels_and_indices(labels):17    distinct_labels = list(set(labels))18    distinct_labels.sort()19    num_labels = len(distinct_labels)20    indices_i = {label: [] for label in distinct_labels}21    for i, label in enumerate(labels):22        indices_i[label].append(i)23    indices_i = {label: np.array(indices) for label, indices in indices_i.items()}24    return num_labels, distinct_labels, indices_i25def plot2D(data, labels, title):26    x_min, x_max = np.min(data, axis=0), np.max(data, axis=0)27    data = (data - x_min) / (x_max - x_min)28    fig, ax = plt.subplots(figsize=(8, 8))29    cjet = cm.get_cmap("jet")30    num_labels, distinct_labels, indices = distinct_labels_and_indices(labels)31    for i, label in enumerate(distinct_labels):32        index = indices[label]33        ax.scatter(data[index, 0], data[index, 1], label=label, c=[cjet(1.0 * i / num_labels)], linewidths=0.)34    ax.legend(loc="center left", bbox_to_anchor=(1, 0, 1, 1),35              title=title.split('/')[-1])36    fig.tight_layout()37    tikzplotlib.save("%s.tex" % title, figure=fig, strict=True)38    plt.savefig("%s.png" % title)39    return fig40def plot2D_overlay(data_list, labels_list, alpha_list, title):41    x_min, x_max = np.array((1e9, 1e9)), np.array((-1e9, -1e9))42    for data in data_list:43        x_min = np.minimum(x_min, np.min(data, axis=0))44        x_max = np.maximum(x_max, np.max(data, axis=0))45    for i in range(len(data_list)):46        data_list[i] = (data_list[i] - x_min) / (x_max - x_min)47    fig, ax = plt.subplots(figsize=(8, 8))48    cjet = cm.get_cmap("jet")49    indices_list = []50    distinct_labels = []51    for labels in labels_list:52        _, cur_labels, indices = distinct_labels_and_indices(labels)53        indices_list.append(indices)54        for label in cur_labels:55            if label not in distinct_labels:56                distinct_labels.append(label)57    num_labels = len(distinct_labels)58    for i, label in enumerate(distinct_labels):59        res = 0.060        for data, labels, indices, alpha in zip(data_list, labels_list, indices_list, alpha_list):61            if label in indices.keys():62                index = indices[label]63            else:64                index = np.array([])65            c = cjet((1.0 * i + res) / (num_labels + 1))66            ax.scatter(data[index, 0], data[index, 1], label=label, c=[c], alpha=alpha, linewidths=0.)67            res += 0.368    handles, labels = ax.get_legend_handles_labels()69    paired_handles = []70    handles_tot = len(handles) // 271    for i in range(handles_tot):72        paired_handles.append((handles[i * 2], handles[i * 2 + 1]))73    ax.legend(handles=paired_handles, labels=distinct_labels, numpoints=1,74              handler_map={tuple: HandlerTuple(ndivide=None)},75              loc="center left", bbox_to_anchor=(1, 0, 1, 1),76              title=title.split('/')[-1])77    fig.tight_layout()78    tikzplotlib.save("%s.tex" % title, figure=fig, strict=True)79    plt.savefig("%s.png" % title)80    return fig81def plot2D_phase(data, labels, title):82    x_min, x_max = np.min(data, axis=0), np.max(data, axis=0)83    data = (data - x_min) / (x_max - x_min)84    figsize = (8, 8)85    add_width = 286    new_width = figsize[0] + add_width87    fig = plt.figure(figsize=(new_width, figsize[1]))88    fac_l, fac_r = figsize[0] / new_width, add_width / new_width89    rect_l = [0.1, 0.1, 0.8, 0.8]90    rect_r = [0., 0.1, 0.2, 0.8]91    ax = fig.add_axes(np.array(rect_l) * np.array([fac_l, 1, fac_l, 1]))92    cax = fig.add_axes(np.array(rect_r) * np.array([fac_r, 1, fac_r, 1]) + np.array([fac_l, 0, 0, 0]))93    sin_labels = list(map(lambda l: np.sin(float(l)), labels))94    bla = ax.scatter(data[:, 0], data[:, 1], c=sin_labels, cmap="jet", alpha=1.0)95    # plt.colorbar(bla, cax=cax) <- some problem with the color bar..96    # fig.tight_layout()97    tikzplotlib.save("%s.tex" % title, figure=fig, strict=True)98    plt.savefig("%s.png" % title)99    return fig100tsne = None101def calc_tsne(raw):102    global tsne103    if tsne is None:104        tsne = TSNE(n_components=2, init='pca', random_state=7)  # n_iter = xxx105    result = tsne.fit_transform(raw)106    return result107pca = None108def calc_pca(raw):109    global pca110    if pca is None:111        pca = PCA(n_components=2)112    return pca.fit_transform(raw)113def calc_pca_curve(raw):114    pcan = PCA()115    pcan.fit_transform(raw)116    pct = pcan.explained_variance_ratio_117    prefix = np.cumsum(pct / np.sum(pct))118    fig = plt.figure(figsize=(4, 4))119    ax = fig.add_axes([0.2, 0.2, 0.6, 0.6])120    ax.plot(list(range(1, 6)), prefix[:5])121    ax.plot(2, prefix[1], "ro")122    ax.annotate("{:.3f}% of variation".format(prefix[1] * 100),123                (2, prefix[1]),124                textcoords="offset points",125                xytext=(60, -20),126                ha="center")127    ax.set_xticks(list(range(1, 6)))128    ax.set_yticks(list(np.arange(0.5, 1.01, 0.1)))129    ax.set_xlabel("number of components")130    ax.set_ylabel("explained variance ratio")131    name = "pca_curve"132    tikzplotlib.save(name + ".tex", figure=fig, strict=True)133    plt.savefig("pca_curve.png")134    return pct135def plot_tsne(raw, labels, title):136    result = calc_tsne(raw)137    return plot2D(result, labels, title)138def plot_content_tsne(raw, slabels, clabels, title):139    name = title + "_tsne"140    path = name + ".npz"141    if os.path.exists(path):142        print("%s already exists" % path)143        result = np.load(path, allow_pickle=True)["result"]144    else:145        print("start to produce %s" % path)146        result = calc_tsne(raw)147        np.savez_compressed(name, result=result)148    plot2D(result, slabels, title + "_style_labels")149    plot2D(result, clabels, title + "_content_labels")150def calc_many_blas(raws, calc_single):151    lens = list(map(lambda x: len(x), raws))152    whole = np.concatenate(raws, axis=0)153    proj = calc_single(whole)154    ret = ()155    suml = 0156    for l in lens:157        ret += (proj[suml: suml + l],)158        suml += l159    return ret160def get_all_plots(data, output_path, writers, iter, summary=True,161                  style_cluster_protocols=('pca'),162                  separate_compute=False):163    """164    data: {"train": dict_train, "test": dict_test}165    dict_train: {"style2d_code": blabla, etc.}166    separate_compute: compute t-SNE for 2D & 3D separately167    """168    ensure_dirs(output_path)169    def fig_title(title):170        return pjoin(output_path, title)171    def add_fig(fig, title, phase):172        if summary:173            writers[phase].add_figure(title, fig, global_step=iter)174    keys = data["train"].keys()175    has2d = "style2d_code" in keys176    has3d = "style3d_code" in keys177    # style codes & adain params178    for suffix in ["_code", "_adain"]:179        codes_raw = []180        titles = []181        phases = []182        data_keys = []183        if has2d: data_keys.append("style2d" + suffix)184        if has3d: data_keys.append("style3d" + suffix)185        for key in data_keys:186            for phase in ["train", "test"]:187                codes_raw.append(data[phase][key])188                titles.append(f'{phase}_{key}')189                phases.append(phase)190        # calc tsne with style2/3d, train/test altogether191        for name, protocol in zip(['pca', 'tsne'], [calc_pca, calc_tsne]):192            if name not in style_cluster_protocols:193                continue194            style_codes = calc_many_blas(codes_raw, protocol)195            fig = plot2D_overlay([style_codes[0], style_codes[2]],196                                 [data["train"]["meta"]["style"], data["train"]["meta"]["style"]],197                                 [1.0, 0.5],198                                 fig_title(f'joint_embedding_{name}{suffix}'))199            add_fig(fig, f'joint_embedding_{name}{suffix}', "train")200            for i, (code, phase, title) in enumerate(zip(style_codes, phases, titles)):201                if separate_compute:202                    code = protocol(codes_raw[i])203                for label_type in ["style", "content"]:204                    fig = plot2D(code, data[phase]["meta"][label_type], fig_title(f'{title}_{name}_{label_type}'))205                    add_fig(fig, f'{title}_{name}_{label_type}', phase)206    # content codes (train only)207    content_code_pca = calc_pca(data["train"]["content_code"])208    for label in ["style", "content", "phase"]:209        if label == "phase":210            indices = [i for i in range(len(data["train"]["meta"]["content"])) if data["train"]["meta"]["content"][i] == "walk"]211            walk_code = content_code_pca[np.array(indices)]212            phase_labels = [data["train"]["meta"]["phase"][i] for i in indices]213            fig = plot2D_phase(walk_code, phase_labels, fig_title(f'content_by_{label}'))214        else:215            fig = plot2D(content_code_pca, data["train"]["meta"][label], fig_title(f'content_by_{label}'))216        add_fig(fig, f'content_by_{label}', "train")217    """218    fig = show_images_from_disk("", all_titles, 2, output_path + "all_codes")219    if summary:220        writers["train"].add_figure("all codes", fig, global_step=iter)221    """222def get_demo_plots(data, output_path):223    """224    data: {"train": dict_train, "test": dict_test}225    dict_train: {"style2d_code": blabla, etc.}226    """227    ensure_dirs(output_path)228    def fig_title(title):229        return pjoin(output_path, title)230    style_labels = data["train"]["meta"]["style"]231    adain_raw = []232    for key in ["style2d_adain", "style3d_adain"]:233        for phase in ["train", "test"]:234            adain_raw.append(data[phase][key])235    adain_tsne = calc_many_blas(adain_raw, calc_tsne)236    plot2D_overlay([adain_tsne[0], adain_tsne[2]],237                   [style_labels, style_labels],238                   [1.0, 0.5],239                   fig_title(f'joint_embedding_adain_tsne'))240    for key in ["style3d_code", "style3d_adain"]:241        tsne_code = calc_tsne(data["train"][key])242        plot2D(tsne_code, style_labels, fig_title(f'{key}_tsne'))243    content_code_pca = calc_pca(data["train"]["content_code"])244    indices = [i for i in range(len(data["train"]["meta"]["content"])) if data["train"]["meta"]["content"][i] == "walk"]245    walk_code = content_code_pca[np.array(indices)]246    phase_labels = [data["train"]["meta"]["phase"][i] for i in indices]247    plot2D_phase(walk_code, phase_labels, fig_title(f'content_by_phase'))248    plot2D(content_code_pca, style_labels, fig_title(f'content_by_style'))249def show_images_from_disk(path, titles, rows, this_title):250    images = []251    for title in titles:252        name = "%s.png" % title253        input_path = os.path.join(path, name)254        images.append(plt.imread(input_path))255    this_title = os.path.join(path, this_title)256    return show_images(images, titles, this_title, rows)257def show_images(images, titles, this_title, rows=1):258    """Display a list of images in a single figure with matplotlib.259    Parameters260    ---------261    images: List of np.arrays compatible with plt.imshow.262    cols (Default = 1): Number of columns in figure (number of rows is263                        set to np.ceil(n_images/float(cols))).264    titles: List of titles corresponding to each image. Must have265            the same length as titles.266    """267    assert (len(images) == len(titles))268    n_images = len(images)269    cols = np.ceil(n_images / float(rows))270    # if titles is None: titles = ['Image (%d)' % i for i in range(1,n_images + 1)]271    size = np.array((8, 8)) * np.array(rows, cols)272    fig = plt.figure(figsize=size)273    for n, (image, title) in enumerate(zip(images, titles)):274        a = fig.add_subplot(rows, cols, n + 1)275        if image.ndim == 2:276            plt.gray()277        a.set_axis_off()278        plt.imshow(image)279        a.set_title(title)280    fig.tight_layout(pad=0, w_pad=0, h_pad=0)281    plt.subplots_adjust(wspace=0, hspace=0)282    # plt.show()283    plt.savefig("%s.png" % this_title, dpi=150, bbox_inches='tight', pad_inches=0)...

MyKNeighborsClassifier.py

Source:MyKNeighborsClassifier.py

1#!/usr/bin/env python2# -*- coding: utf-8 -*-3from scipy.spatial import distance4class MyKNeighborsClassifier:5	"""Classifier implementing the k-nearest neighbors vote similar to sklearn 6	library but different.7	https://goo.gl/Cmji3U8	But still same.9	10	Parameters11	----------12	n_neighbors : int, optional (default = 5)13		Number of neighbors to use by default.14	method : string, optional (default = 'classical')15		method for voting. Possible values:16		- 'classical' : uniform weights.  All points in each neighborhood17		  are weighted equally.18		- 'weighted' : weight points by the inverse of their distance.19		  in this case, closer neighbors of a query point will have a20		  greater influence than neighbors which are further away.21		- 'validity' weights are calculated with distance and multiplied22		  of validity for each voter.  23		Note: implementing kd_tree is bonus.24	norm : {'l1', 'l2'}, optional (default = 'l2')25		Distance norm. 'l1' is manhattan distance. 'l2' is euclidean distance.26	Examples27	--------28	"""29	def __init__(self, n_neighbors=5, method='classical', norm='l2'):30		self.n_neighbors = n_neighbors31		self.method = method32		self.norm = norm33		self.labels = []34		self.distinct_labels = []35		self.data = []36	def fit(self, X, y):37		"""Fit the model using X as training data and y as target values38		Parameters39		----------40		X : array-like, shape (n_query, n_features),41			Training data. 42		y : array-like, shape = [n_samples] 43			Target values.44		"""45		self.data = X46		self.labels = y47		self.distinct_labels = sorted(list(set(y)))					# get distinct classes48		if self.method == "validity":49			self.validities = []50			# calculate validities for each sample			51			for sample_i, sample in enumerate(self.data):52				distances = []53				if self.norm == "l1":54					# apply Manhattan distance55					for i, x in enumerate(self.data):56						if x != sample:57							print x, sample58							distances.append([distance.cityblock(x, sample), self.labels[i]])			# store [distance, label] pairs in distances list59				elif self.norm == "l2":60					# apply Euclidean distance61					for i, x in enumerate(self.data):62						if x != sample:63							print x, sample64							distances.append([distance.euclidean(x, sample), self.labels[i]])			# store [distance, label] pairs in distances list65				# get n nearest neighbors66				nearest_neighbors = sorted(distances, key = lambda x: x[0])[:self.n_neighbors]		# sort wrt distance67				label_weights = [0.0] * len(self.distinct_labels)						# store label weights wrt neighbors for each label68				for i, neighbor in enumerate(nearest_neighbors):69					label_index = self.distinct_labels.index(neighbor[1])70					label_weights[label_index] += (1.0 / (neighbor[0] + 1e-15))71				sample_label = self.labels[sample_i]72				validity = label_weights[self.distinct_labels.index(sample_label)] / sum(label_weights)73				self.validities.append(validity)74	def predict(self, X):75		"""Predict the class labels for the provided data76		Parameters77		----------78		X : array-like, shape (n_query, n_features),79			Test samples.80		Returns81		-------82		y : array of shape [n_samples]83			Class labels for each data sample.84		"""85		if len(self.labels) == 0:86			raise ValueError("You should fit first!")87			88		y = []			# store labels for each data89		for x_i, x in enumerate(X):90			# find distance of x to all training data91			distances = []92			if self.norm == "l1":				93				for i, data in enumerate(self.data):94					distances.append([distance.cityblock(x, data), self.labels[i], i])			# store [distance, label, data_index] pairs in distances list95			elif self.norm == "l2":96				for i, data in enumerate(self.data):97					distances.append([distance.euclidean(x, data), self.labels[i], i])			# store [distance, label, data_index] pairs in distances list98			# get n nearest neighbors99			nearest_neighbors = sorted(distances, key = lambda x: x[0])[:self.n_neighbors]		# sort wrt distance100				101			votes = [0] * self.n_neighbors 							# store vote(label) of each nearest neighbor102			if self.method == "classical":103				for i, neighbor in enumerate(nearest_neighbors):104					votes[i] = neighbor[1]105				y.append(max(votes, key = votes.count))106			elif self.method == "weighted":107					108				weights = []										# store weight of each neighbor (1/(distance + 1))109				for i, neighbor in enumerate(nearest_neighbors):110					weights.append(1.0 / (neighbor[0] + 1e-15))111				total_weights = [0] * len(self.distinct_labels)					# store total weights for each label wrt indexes in self.distinct_labels112				for i, neighbor in enumerate(nearest_neighbors):113					label_index = self.distinct_labels.index(neighbor[1])114					total_weights[label_index] += weights[i]115				y.append(self.distinct_labels[total_weights.index(max(total_weights))])116			elif self.method == "validity":117				validities = []										# store weight of each neighbor (1/(distance + 1))118				for i, neighbor in enumerate(nearest_neighbors):119					validities.append((1.0 / (neighbor[0] + 1e-15)) * self.validities[neighbor[2]])						120				121				total_validities = [0] * len(self.distinct_labels)					# store total validity*weight values for each label wrt indexes in self.distinct_labels122				for i, neighbor in enumerate(nearest_neighbors):123					label_index = self.distinct_labels.index(neighbor[1])124					total_validities[label_index] += validities[i]125				y.append(self.distinct_labels[total_validities.index(max(total_validities))])126		return y127		128	def predict_proba(self, X, method=None):129		"""Return probability estimates for the test data X.130		Parameters131		----------132		X : array-like, shape (n_query, n_features),133			Test samples.134		method : string, if None uses self.method.135		Returns136		-------137		p : array of shape = [n_samples, n_classes]138			The class probabilities of the input samples. Classes are ordered139			by lexicographic order.140		"""141		if method == None:142			method = self.method143		p = [[0] * len(self.distinct_labels)] * len(X)					# store probabilities of each class for each sample144		145		for x_i, x in enumerate(X):			146			# find distance of x to all training data147			distances = []148			if self.norm == "l1":				149				for i, data in enumerate(self.data):150					distances.append([distance.cityblock(x, data), self.labels[i], i])			# store [distance, label, data_index] pairs in distances list151			elif self.norm == "l2":152				for i, data in enumerate(self.data):153					distances.append([distance.euclidean(x, data), self.labels[i], i])			# store [distance, label, data_index] pairs in distances list154			# get n nearest neighbors155			nearest_neighbors = sorted(distances, key = lambda x: x[0])[:self.n_neighbors]		# sort wrt distance156				157			classes = [0] * len(self.distinct_labels) 					# store how many neighbors there are in each class158			159			if method == "classical":160				for i, neighbor in enumerate(nearest_neighbors):161					label_index = self.distinct_labels.index(neighbor[1])162					classes[label_index] += 1163				164				for class_i in range(len(classes)):165					p[x_i][class_i] = float(classes[class_i]) / self.n_neighbors166				167			elif method == "weighted":168				weights = []										# store weight of each neighbor (1/(distance + 1))169				for i, neighbor in enumerate(nearest_neighbors):170					weights.append(1.0 / (neighbor[0] + 1e-15))171				for i, neighbor in enumerate(nearest_neighbors):172					label_index = self.distinct_labels.index(neighbor[1])173					classes[label_index] += weights[i]174				for class_i in range(len(classes)):175					p[x_i][class_i] = float(classes[class_i]) / self.n_neighbors176				177			elif method == "validity":178				validities = []										# store validity*weight of each neighbor (1/(distance + 1))*validity179				for i, neighbor in enumerate(nearest_neighbors):180					validities.append((1.0 / (neighbor[0] + 1e-15)) * self.validities[neighbor[2]])						181				class_validities = [0] * len(self.distinct_labels)					# store total validity*weight values for each label wrt indexes in self.distinct_labels182				for i, neighbor in enumerate(nearest_neighbors):183					label_index = self.distinct_labels.index(neighbor[1])184					class_validities[label_index] += validities[i]185				for valid_i in range(len(class_validities)):186					p[x_i][valid_i] = float(class_validities[valid_i]) / self.n_neighbors187			# normalize probability list for each sample188			norm = sum(p[x_i])			189			p[x_i] = [round(j / norm, 8) for j in p[x_i]]190		return p191if __name__=='__main__':192	X = [[0], [1], [2], [3]]193	y = [0, 0, 1, 1]194	neigh = MyKNeighborsClassifier(n_neighbors=3, method="validity")195	neigh.fit(X, y)196	197	print neigh.predict([[1.1]]) #, [6.7], [5], [1.9], [0]])198	n = 0.9199	print(neigh.predict_proba([[n]], method='classical'))200	# [[0.66666667 0.33333333]]201	print(neigh.predict_proba([[n]], method='weighted'))202	# [[0.92436975 0.07563025]]203	print(neigh.predict_proba([[n]], method='validity'))...

aknn_alg.py

Source:aknn_alg.py

1"""2Code for the AKNN classification rule from:3An adaptive nearest neighbor rule for classification4Akshay Balsubramani, Sanjoy Dasgupta, Yoav Freund, Shay Moran5https://arxiv.org/abs/1905.127176Author: Akshay Balsubramani7"""8import numpy as np, sklearn, time9import sklearn.metrics10from sklearn.neighbors import NearestNeighbors11import pynndescent12def aknn_predict(13    ref_data, 14    labels, 15    margin=1.0, 16    query_data=None, 17    max_k=100, 18    use_nndescent=False19):20    # itime = time.time()21    self_is_data = False22    if query_data is None:23        query_data = ref_data24        self_is_data = True25    26    itime = time.time()27    nbrs_list = _calc_nbrs_exact(ref_data, k=max_k, use_nndescent=False)28    print('Neighbor indices computed. Time:\t {}'.format(time.time() - itime))29    aknn_predictions = predict_nn_rule(nbrs_list, labels)30    print('AKNN predictions made. Time:\t {}'.format(time.time() - itime))31    return aknn_predictions32    """33    nbrs = sklearn.neighbors.NearestNeighbors(n_neighbors=max_k).fit(ref_data)34    if use_nndescent:35        index = pynndescent.NNDescent(raw_data, n_neighbors=k)36        indices, distances = index.neighbor_graph37    else:38        distances, indices = nbrs.kneighbors(query_data)39    if self_is_data:40        indices = indices[:, 1:]41    distinct_labels = np.unique(labels)42    rngarr = np.arange(indices.shape[1])+143    query_nbrs = labels[indices]44    fracs_labels = [np.cumsum(query_nbrs == i, axis=1)/rngarr for i in distinct_labels]45    46    thresholds = margin/np.sqrt(np.arange(indices.shape[1]) + 1)47    numlabels_predicted = np.add.reduce([f > (thresholds + 1.0/len(distinct_labels)) for f in fracs_labels])48    adaptive_k = np.argmax(numlabels_predicted > 0, axis=1)49    pred_labels = np.zeros(fracs_labels[0].shape[0]).astype(str)50    for i in range(fracs_labels[0].shape[0]):51        if adaptive_k[i] == 0:52            pred_labels[i] = '?'53        else:54            lst = [f[i, adaptive_k[i]] for f in fracs_labels]55            pred_labels[i] = distinct_labels[np.argmax(lst)]56    return np.array(pred_labels), np.array(adaptive_k)57    """58def predict_nn_rule(nbr_list_sorted, labels, margin=1.0):59    """60    Given matrix of ordered nearest neighbors for each point, returns AKNN's label predictions and adaptive neighborhood sizes.61    62    Parameters63    ----------64    nbr_list_sorted: array of shape (n_samples, n_neighbors)65        Indices of the `n_neighbors` nearest neighbors in the dataset, for each data point.66    labels: array of shape (n_samples)67        Dataset labels.68    69    margin: float70        The confidence parameter "A" from the AKNN paper.71    Returns72    -------73    pred_labels: array of shape (n_samples)74        AKNN label predictions on dataset.75    adaptive_ks: array of shape (n_samples)76        AKNN neighborhood sizes on dataset.77    78    emp_margins: array of shape (n_samples)79        Empirically calculated "advantage" of each point.80    """81    pred_labels = []82    adaptive_ks = []83    thresholds = margin/np.sqrt(np.arange(nbr_list_sorted.shape[1])+1)84    distinct_labels = np.unique(labels)85    for i in range(nbr_list_sorted.shape[0]):86        (pred_label, adaptive_k_ndx, _, emp_margin) = aknn(nbr_list_sorted[i,:], labels, thresholds)87        pred_labels.append(pred_label)88        adaptive_ks.append(adaptive_k_ndx + 1)89        emp_margins.append(emp_margin)90    return np.array(pred_labels), np.array(adaptive_ks), np.array(emp_margins)91def aknn(nbrs_arr, labels, thresholds, distinct_labels=['A','B','C','D','E','F','G','H','I','J']):92    """93    Apply AKNN rule for a query point, given its list of nearest neighbors.94    95    Parameters96    ----------97    nbrs_arr: array of shape (n_neighbors)98        Indices of the `n_neighbors` nearest neighbors in the dataset.99    labels: array of shape (n_samples)100        Dataset labels.101    102    thresholds: array of shape (n_neighbors)103        Bias thresholds at different neighborhood sizes.104    Returns105    -------106    pred_label: string107        AKNN label prediction.108    first_admissible_ndx: int109        n-1, where AKNN chooses neighborhood size n.110    111    fracs_labels: array of shape (n_labels, n_neighbors)112        Fraction of each label in balls of different neighborhood sizes.113        114    emp_margin: float115        Empirical "advantage" of the point, as specific by the AKNN paper.116    """117    query_nbrs = labels[nbrs_arr]118    mtr = np.stack([query_nbrs == i for i in distinct_labels])119    rngarr = np.arange(len(nbrs_arr))+1120    fracs_labels = np.cumsum(mtr, axis=1)/rngarr121    biases = fracs_labels - 1.0/len(distinct_labels)122    emp_margin = np.max(rngarr*biases*biases)123    numlabels_predicted = np.sum(biases > thresholds, axis=0)124    admissible_ndces = np.where(numlabels_predicted > 0)[0]125    first_admissible_ndx = admissible_ndces[0] if len(admissible_ndces) > 0 else len(nbrs_arr)126    # Break any ties between labels at stopping radius, by taking the most biased label127    pred_label = '?' if first_admissible_ndx == len(nbrs_arr) else distinct_labels[np.argmax(biases[:, first_admissible_ndx])]128    return (pred_label, first_admissible_ndx, fracs_labels, emp_margin)129def knn_rule(nbr_list_sorted, labels, k=10):130    """131    For benchmarking: given matrix of ordered nearest neighbors for each point, returns kNN rule's label predictions.132    133    Parameters134    ----------135    nbr_list_sorted: array of shape (n_samples, n_neighbors)136        Indices of the `n_neighbors` nearest neighbors in the dataset, for each data point.137    Returns138    -------139    array of shape (n_samples)140        Predictions of the k-NN rule for each data point.141    """142    toret = []143    for i in range(nbr_list_sorted.shape[0]):144        uq = np.unique(labels[nbr_list_sorted[i,:k]], return_counts=True)145        toret.append(uq[0][np.argmax(uq[1])])146    return np.array(toret)147def _calc_nbrs_exact(raw_data, k=1000, brute_force=False, use_nndescent=False, query_is_ref=True):148    """149    Calculate list of `k` exact Euclidean nearest neighbors for each point.150    151    Parameters152    ----------153    raw_data: array of shape (n_samples, n_features)154        Input dataset.155    Returns156    -------157    nbr_list_sorted: array of shape (n_samples, n_neighbors)158        Indices of the `n_neighbors` nearest neighbors in the dataset, for each data point.159    """160    if use_nndescent:161        index = pynndescent.NNDescent(raw_data, n_neighbors=k)162        indices, distances = index.neighbor_graph163        if query_is_ref:164            return indices[:, 1:]165        else:166            return indices167    if brute_force:168        a = sklearn.metrics.pairwise_distances(raw_data)169        nbr_list_sorted = np.argsort(a, axis=1)170        if query_is_ref:171            nbr_list_sorted = nbr_list_sorted[:, 1:]172        return nbr_list_sorted[:, :k]173    else:174        distances, indices = NearestNeighbors(n_neighbors=k+1).fit(raw_data).kneighbors(raw_data)175        if query_is_ref:176            return indices[:, 1:]177        else:...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.