How to use distinct_labels method in hypothesis

Best Python code snippet using hypothesis

latent_plot_utils.py

Source:latent_plot_utils.py Github

copy

Full Screen

1import os2import sys3import numpy as np4import matplotlib.pyplot as plt5import torch6from sklearn.manifold import TSNE7from sklearn.decomposition import PCA8from matplotlib import cm9from matplotlib.legend_handler import HandlerLine2D, HandlerTuple10import tikzplotlib11from os.path import join as pjoin12BASEPATH = os.path.dirname(os.path.abspath(__file__))13sys.path.insert(0, BASEPATH)14sys.path.insert(0, pjoin(BASEPATH, '..'))15from py_utils import ensure_dirs16def distinct_labels_and_indices(labels):17 distinct_labels = list(set(labels))18 distinct_labels.sort()19 num_labels = len(distinct_labels)20 indices_i = {label: [] for label in distinct_labels}21 for i, label in enumerate(labels):22 indices_i[label].append(i)23 indices_i = {label: np.array(indices) for label, indices in indices_i.items()}24 return num_labels, distinct_labels, indices_i25def plot2D(data, labels, title):26 x_min, x_max = np.min(data, axis=0), np.max(data, axis=0)27 data = (data - x_min) / (x_max - x_min)28 fig, ax = plt.subplots(figsize=(8, 8))29 cjet = cm.get_cmap("jet")30 num_labels, distinct_labels, indices = distinct_labels_and_indices(labels)31 for i, label in enumerate(distinct_labels):32 index = indices[label]33 ax.scatter(data[index, 0], data[index, 1], label=label, c=[cjet(1.0 * i / num_labels)], linewidths=0.)34 ax.legend(loc="center left", bbox_to_anchor=(1, 0, 1, 1),35 title=title.split('/')[-1])36 fig.tight_layout()37 tikzplotlib.save("%s.tex" % title, figure=fig, strict=True)38 plt.savefig("%s.png" % title)39 return fig40def plot2D_overlay(data_list, labels_list, alpha_list, title):41 x_min, x_max = np.array((1e9, 1e9)), np.array((-1e9, -1e9))42 for data in data_list:43 x_min = np.minimum(x_min, np.min(data, axis=0))44 x_max = np.maximum(x_max, np.max(data, axis=0))45 for i in range(len(data_list)):46 data_list[i] = (data_list[i] - x_min) / (x_max - x_min)47 fig, ax = plt.subplots(figsize=(8, 8))48 cjet = cm.get_cmap("jet")49 indices_list = []50 distinct_labels = []51 for labels in labels_list:52 _, cur_labels, indices = distinct_labels_and_indices(labels)53 indices_list.append(indices)54 for label in cur_labels:55 if label not in distinct_labels:56 distinct_labels.append(label)57 num_labels = len(distinct_labels)58 for i, label in enumerate(distinct_labels):59 res = 0.060 for data, labels, indices, alpha in zip(data_list, labels_list, indices_list, alpha_list):61 if label in indices.keys():62 index = indices[label]63 else:64 index = np.array([])65 c = cjet((1.0 * i + res) / (num_labels + 1))66 ax.scatter(data[index, 0], data[index, 1], label=label, c=[c], alpha=alpha, linewidths=0.)67 res += 0.368 handles, labels = ax.get_legend_handles_labels()69 paired_handles = []70 handles_tot = len(handles) // 271 for i in range(handles_tot):72 paired_handles.append((handles[i * 2], handles[i * 2 + 1]))73 ax.legend(handles=paired_handles, labels=distinct_labels, numpoints=1,74 handler_map={tuple: HandlerTuple(ndivide=None)},75 loc="center left", bbox_to_anchor=(1, 0, 1, 1),76 title=title.split('/')[-1])77 fig.tight_layout()78 tikzplotlib.save("%s.tex" % title, figure=fig, strict=True)79 plt.savefig("%s.png" % title)80 return fig81def plot2D_phase(data, labels, title):82 x_min, x_max = np.min(data, axis=0), np.max(data, axis=0)83 data = (data - x_min) / (x_max - x_min)84 figsize = (8, 8)85 add_width = 286 new_width = figsize[0] + add_width87 fig = plt.figure(figsize=(new_width, figsize[1]))88 fac_l, fac_r = figsize[0] / new_width, add_width / new_width89 rect_l = [0.1, 0.1, 0.8, 0.8]90 rect_r = [0., 0.1, 0.2, 0.8]91 ax = fig.add_axes(np.array(rect_l) * np.array([fac_l, 1, fac_l, 1]))92 cax = fig.add_axes(np.array(rect_r) * np.array([fac_r, 1, fac_r, 1]) + np.array([fac_l, 0, 0, 0]))93 sin_labels = list(map(lambda l: np.sin(float(l)), labels))94 bla = ax.scatter(data[:, 0], data[:, 1], c=sin_labels, cmap="jet", alpha=1.0)95 # plt.colorbar(bla, cax=cax) <- some problem with the color bar..96 # fig.tight_layout()97 tikzplotlib.save("%s.tex" % title, figure=fig, strict=True)98 plt.savefig("%s.png" % title)99 return fig100tsne = None101def calc_tsne(raw):102 global tsne103 if tsne is None:104 tsne = TSNE(n_components=2, init='pca', random_state=7) # n_iter = xxx105 result = tsne.fit_transform(raw)106 return result107pca = None108def calc_pca(raw):109 global pca110 if pca is None:111 pca = PCA(n_components=2)112 return pca.fit_transform(raw)113def calc_pca_curve(raw):114 pcan = PCA()115 pcan.fit_transform(raw)116 pct = pcan.explained_variance_ratio_117 prefix = np.cumsum(pct / np.sum(pct))118 fig = plt.figure(figsize=(4, 4))119 ax = fig.add_axes([0.2, 0.2, 0.6, 0.6])120 ax.plot(list(range(1, 6)), prefix[:5])121 ax.plot(2, prefix[1], "ro")122 ax.annotate("{:.3f}% of variation".format(prefix[1] * 100),123 (2, prefix[1]),124 textcoords="offset points",125 xytext=(60, -20),126 ha="center")127 ax.set_xticks(list(range(1, 6)))128 ax.set_yticks(list(np.arange(0.5, 1.01, 0.1)))129 ax.set_xlabel("number of components")130 ax.set_ylabel("explained variance ratio")131 name = "pca_curve"132 tikzplotlib.save(name + ".tex", figure=fig, strict=True)133 plt.savefig("pca_curve.png")134 return pct135def plot_tsne(raw, labels, title):136 result = calc_tsne(raw)137 return plot2D(result, labels, title)138def plot_content_tsne(raw, slabels, clabels, title):139 name = title + "_tsne"140 path = name + ".npz"141 if os.path.exists(path):142 print("%s already exists" % path)143 result = np.load(path, allow_pickle=True)["result"]144 else:145 print("start to produce %s" % path)146 result = calc_tsne(raw)147 np.savez_compressed(name, result=result)148 plot2D(result, slabels, title + "_style_labels")149 plot2D(result, clabels, title + "_content_labels")150def calc_many_blas(raws, calc_single):151 lens = list(map(lambda x: len(x), raws))152 whole = np.concatenate(raws, axis=0)153 proj = calc_single(whole)154 ret = ()155 suml = 0156 for l in lens:157 ret += (proj[suml: suml + l],)158 suml += l159 return ret160def get_all_plots(data, output_path, writers, iter, summary=True,161 style_cluster_protocols=('pca'),162 separate_compute=False):163 """164 data: {"train": dict_train, "test": dict_test}165 dict_train: {"style2d_code": blabla, etc.}166 separate_compute: compute t-SNE for 2D & 3D separately167 """168 ensure_dirs(output_path)169 def fig_title(title):170 return pjoin(output_path, title)171 def add_fig(fig, title, phase):172 if summary:173 writers[phase].add_figure(title, fig, global_step=iter)174 keys = data["train"].keys()175 has2d = "style2d_code" in keys176 has3d = "style3d_code" in keys177 # style codes & adain params178 for suffix in ["_code", "_adain"]:179 codes_raw = []180 titles = []181 phases = []182 data_keys = []183 if has2d: data_keys.append("style2d" + suffix)184 if has3d: data_keys.append("style3d" + suffix)185 for key in data_keys:186 for phase in ["train", "test"]:187 codes_raw.append(data[phase][key])188 titles.append(f'{phase}_{key}')189 phases.append(phase)190 # calc tsne with style2/3d, train/test altogether191 for name, protocol in zip(['pca', 'tsne'], [calc_pca, calc_tsne]):192 if name not in style_cluster_protocols:193 continue194 style_codes = calc_many_blas(codes_raw, protocol)195 fig = plot2D_overlay([style_codes[0], style_codes[2]],196 [data["train"]["meta"]["style"], data["train"]["meta"]["style"]],197 [1.0, 0.5],198 fig_title(f'joint_embedding_{name}{suffix}'))199 add_fig(fig, f'joint_embedding_{name}{suffix}', "train")200 for i, (code, phase, title) in enumerate(zip(style_codes, phases, titles)):201 if separate_compute:202 code = protocol(codes_raw[i])203 for label_type in ["style", "content"]:204 fig = plot2D(code, data[phase]["meta"][label_type], fig_title(f'{title}_{name}_{label_type}'))205 add_fig(fig, f'{title}_{name}_{label_type}', phase)206 # content codes (train only)207 content_code_pca = calc_pca(data["train"]["content_code"])208 for label in ["style", "content", "phase"]:209 if label == "phase":210 indices = [i for i in range(len(data["train"]["meta"]["content"])) if data["train"]["meta"]["content"][i] == "walk"]211 walk_code = content_code_pca[np.array(indices)]212 phase_labels = [data["train"]["meta"]["phase"][i] for i in indices]213 fig = plot2D_phase(walk_code, phase_labels, fig_title(f'content_by_{label}'))214 else:215 fig = plot2D(content_code_pca, data["train"]["meta"][label], fig_title(f'content_by_{label}'))216 add_fig(fig, f'content_by_{label}', "train")217 """218 fig = show_images_from_disk("", all_titles, 2, output_path + "all_codes")219 if summary:220 writers["train"].add_figure("all codes", fig, global_step=iter)221 """222def get_demo_plots(data, output_path):223 """224 data: {"train": dict_train, "test": dict_test}225 dict_train: {"style2d_code": blabla, etc.}226 """227 ensure_dirs(output_path)228 def fig_title(title):229 return pjoin(output_path, title)230 style_labels = data["train"]["meta"]["style"]231 adain_raw = []232 for key in ["style2d_adain", "style3d_adain"]:233 for phase in ["train", "test"]:234 adain_raw.append(data[phase][key])235 adain_tsne = calc_many_blas(adain_raw, calc_tsne)236 plot2D_overlay([adain_tsne[0], adain_tsne[2]],237 [style_labels, style_labels],238 [1.0, 0.5],239 fig_title(f'joint_embedding_adain_tsne'))240 for key in ["style3d_code", "style3d_adain"]:241 tsne_code = calc_tsne(data["train"][key])242 plot2D(tsne_code, style_labels, fig_title(f'{key}_tsne'))243 content_code_pca = calc_pca(data["train"]["content_code"])244 indices = [i for i in range(len(data["train"]["meta"]["content"])) if data["train"]["meta"]["content"][i] == "walk"]245 walk_code = content_code_pca[np.array(indices)]246 phase_labels = [data["train"]["meta"]["phase"][i] for i in indices]247 plot2D_phase(walk_code, phase_labels, fig_title(f'content_by_phase'))248 plot2D(content_code_pca, style_labels, fig_title(f'content_by_style'))249def show_images_from_disk(path, titles, rows, this_title):250 images = []251 for title in titles:252 name = "%s.png" % title253 input_path = os.path.join(path, name)254 images.append(plt.imread(input_path))255 this_title = os.path.join(path, this_title)256 return show_images(images, titles, this_title, rows)257def show_images(images, titles, this_title, rows=1):258 """Display a list of images in a single figure with matplotlib.259 Parameters260 ---------261 images: List of np.arrays compatible with plt.imshow.262 cols (Default = 1): Number of columns in figure (number of rows is263 set to np.ceil(n_images/float(cols))).264 titles: List of titles corresponding to each image. Must have265 the same length as titles.266 """267 assert (len(images) == len(titles))268 n_images = len(images)269 cols = np.ceil(n_images / float(rows))270 # if titles is None: titles = ['Image (%d)' % i for i in range(1,n_images + 1)]271 size = np.array((8, 8)) * np.array(rows, cols)272 fig = plt.figure(figsize=size)273 for n, (image, title) in enumerate(zip(images, titles)):274 a = fig.add_subplot(rows, cols, n + 1)275 if image.ndim == 2:276 plt.gray()277 a.set_axis_off()278 plt.imshow(image)279 a.set_title(title)280 fig.tight_layout(pad=0, w_pad=0, h_pad=0)281 plt.subplots_adjust(wspace=0, hspace=0)282 # plt.show()283 plt.savefig("%s.png" % this_title, dpi=150, bbox_inches='tight', pad_inches=0)...

Full Screen

Full Screen

MyKNeighborsClassifier.py

Source:MyKNeighborsClassifier.py Github

copy

Full Screen

1#!/usr/bin/env python2# -*- coding: utf-8 -*-3from scipy.spatial import distance4class MyKNeighborsClassifier:5 """Classifier implementing the k-nearest neighbors vote similar to sklearn 6 library but different.7 https://goo.gl/Cmji3U8 But still same.9 10 Parameters11 ----------12 n_neighbors : int, optional (default = 5)13 Number of neighbors to use by default.14 method : string, optional (default = 'classical')15 method for voting. Possible values:16 - 'classical' : uniform weights. All points in each neighborhood17 are weighted equally.18 - 'weighted' : weight points by the inverse of their distance.19 in this case, closer neighbors of a query point will have a20 greater influence than neighbors which are further away.21 - 'validity' weights are calculated with distance and multiplied22 of validity for each voter. 23 Note: implementing kd_tree is bonus.24 norm : {'l1', 'l2'}, optional (default = 'l2')25 Distance norm. 'l1' is manhattan distance. 'l2' is euclidean distance.26 Examples27 --------28 """29 def __init__(self, n_neighbors=5, method='classical', norm='l2'):30 self.n_neighbors = n_neighbors31 self.method = method32 self.norm = norm33 self.labels = []34 self.distinct_labels = []35 self.data = []36 def fit(self, X, y):37 """Fit the model using X as training data and y as target values38 Parameters39 ----------40 X : array-like, shape (n_query, n_features),41 Training data. 42 y : array-like, shape = [n_samples] 43 Target values.44 """45 self.data = X46 self.labels = y47 self.distinct_labels = sorted(list(set(y))) # get distinct classes48 if self.method == "validity":49 self.validities = []50 # calculate validities for each sample 51 for sample_i, sample in enumerate(self.data):52 distances = []53 if self.norm == "l1":54 # apply Manhattan distance55 for i, x in enumerate(self.data):56 if x != sample:57 print x, sample58 distances.append([distance.cityblock(x, sample), self.labels[i]]) # store [distance, label] pairs in distances list59 elif self.norm == "l2":60 # apply Euclidean distance61 for i, x in enumerate(self.data):62 if x != sample:63 print x, sample64 distances.append([distance.euclidean(x, sample), self.labels[i]]) # store [distance, label] pairs in distances list65 # get n nearest neighbors66 nearest_neighbors = sorted(distances, key = lambda x: x[0])[:self.n_neighbors] # sort wrt distance67 label_weights = [0.0] * len(self.distinct_labels) # store label weights wrt neighbors for each label68 for i, neighbor in enumerate(nearest_neighbors):69 label_index = self.distinct_labels.index(neighbor[1])70 label_weights[label_index] += (1.0 / (neighbor[0] + 1e-15))71 sample_label = self.labels[sample_i]72 validity = label_weights[self.distinct_labels.index(sample_label)] / sum(label_weights)73 self.validities.append(validity)74 def predict(self, X):75 """Predict the class labels for the provided data76 Parameters77 ----------78 X : array-like, shape (n_query, n_features),79 Test samples.80 Returns81 -------82 y : array of shape [n_samples]83 Class labels for each data sample.84 """85 if len(self.labels) == 0:86 raise ValueError("You should fit first!")87 88 y = [] # store labels for each data89 for x_i, x in enumerate(X):90 # find distance of x to all training data91 distances = []92 if self.norm == "l1": 93 for i, data in enumerate(self.data):94 distances.append([distance.cityblock(x, data), self.labels[i], i]) # store [distance, label, data_index] pairs in distances list95 elif self.norm == "l2":96 for i, data in enumerate(self.data):97 distances.append([distance.euclidean(x, data), self.labels[i], i]) # store [distance, label, data_index] pairs in distances list98 # get n nearest neighbors99 nearest_neighbors = sorted(distances, key = lambda x: x[0])[:self.n_neighbors] # sort wrt distance100 101 votes = [0] * self.n_neighbors # store vote(label) of each nearest neighbor102 if self.method == "classical":103 for i, neighbor in enumerate(nearest_neighbors):104 votes[i] = neighbor[1]105 y.append(max(votes, key = votes.count))106 elif self.method == "weighted":107 108 weights = [] # store weight of each neighbor (1/(distance + 1))109 for i, neighbor in enumerate(nearest_neighbors):110 weights.append(1.0 / (neighbor[0] + 1e-15))111 total_weights = [0] * len(self.distinct_labels) # store total weights for each label wrt indexes in self.distinct_labels112 for i, neighbor in enumerate(nearest_neighbors):113 label_index = self.distinct_labels.index(neighbor[1])114 total_weights[label_index] += weights[i]115 y.append(self.distinct_labels[total_weights.index(max(total_weights))])116 elif self.method == "validity":117 validities = [] # store weight of each neighbor (1/(distance + 1))118 for i, neighbor in enumerate(nearest_neighbors):119 validities.append((1.0 / (neighbor[0] + 1e-15)) * self.validities[neighbor[2]]) 120 121 total_validities = [0] * len(self.distinct_labels) # store total validity*weight values for each label wrt indexes in self.distinct_labels122 for i, neighbor in enumerate(nearest_neighbors):123 label_index = self.distinct_labels.index(neighbor[1])124 total_validities[label_index] += validities[i]125 y.append(self.distinct_labels[total_validities.index(max(total_validities))])126 return y127 128 def predict_proba(self, X, method=None):129 """Return probability estimates for the test data X.130 Parameters131 ----------132 X : array-like, shape (n_query, n_features),133 Test samples.134 method : string, if None uses self.method.135 Returns136 -------137 p : array of shape = [n_samples, n_classes]138 The class probabilities of the input samples. Classes are ordered139 by lexicographic order.140 """141 if method == None:142 method = self.method143 p = [[0] * len(self.distinct_labels)] * len(X) # store probabilities of each class for each sample144 145 for x_i, x in enumerate(X): 146 # find distance of x to all training data147 distances = []148 if self.norm == "l1": 149 for i, data in enumerate(self.data):150 distances.append([distance.cityblock(x, data), self.labels[i], i]) # store [distance, label, data_index] pairs in distances list151 elif self.norm == "l2":152 for i, data in enumerate(self.data):153 distances.append([distance.euclidean(x, data), self.labels[i], i]) # store [distance, label, data_index] pairs in distances list154 # get n nearest neighbors155 nearest_neighbors = sorted(distances, key = lambda x: x[0])[:self.n_neighbors] # sort wrt distance156 157 classes = [0] * len(self.distinct_labels) # store how many neighbors there are in each class158 159 if method == "classical":160 for i, neighbor in enumerate(nearest_neighbors):161 label_index = self.distinct_labels.index(neighbor[1])162 classes[label_index] += 1163 164 for class_i in range(len(classes)):165 p[x_i][class_i] = float(classes[class_i]) / self.n_neighbors166 167 elif method == "weighted":168 weights = [] # store weight of each neighbor (1/(distance + 1))169 for i, neighbor in enumerate(nearest_neighbors):170 weights.append(1.0 / (neighbor[0] + 1e-15))171 for i, neighbor in enumerate(nearest_neighbors):172 label_index = self.distinct_labels.index(neighbor[1])173 classes[label_index] += weights[i]174 for class_i in range(len(classes)):175 p[x_i][class_i] = float(classes[class_i]) / self.n_neighbors176 177 elif method == "validity":178 validities = [] # store validity*weight of each neighbor (1/(distance + 1))*validity179 for i, neighbor in enumerate(nearest_neighbors):180 validities.append((1.0 / (neighbor[0] + 1e-15)) * self.validities[neighbor[2]]) 181 class_validities = [0] * len(self.distinct_labels) # store total validity*weight values for each label wrt indexes in self.distinct_labels182 for i, neighbor in enumerate(nearest_neighbors):183 label_index = self.distinct_labels.index(neighbor[1])184 class_validities[label_index] += validities[i]185 for valid_i in range(len(class_validities)):186 p[x_i][valid_i] = float(class_validities[valid_i]) / self.n_neighbors187 # normalize probability list for each sample188 norm = sum(p[x_i]) 189 p[x_i] = [round(j / norm, 8) for j in p[x_i]]190 return p191if __name__=='__main__':192 X = [[0], [1], [2], [3]]193 y = [0, 0, 1, 1]194 neigh = MyKNeighborsClassifier(n_neighbors=3, method="validity")195 neigh.fit(X, y)196 197 print neigh.predict([[1.1]]) #, [6.7], [5], [1.9], [0]])198 n = 0.9199 print(neigh.predict_proba([[n]], method='classical'))200 # [[0.66666667 0.33333333]]201 print(neigh.predict_proba([[n]], method='weighted'))202 # [[0.92436975 0.07563025]]203 print(neigh.predict_proba([[n]], method='validity'))...

Full Screen

Full Screen

aknn_alg.py

Source:aknn_alg.py Github

copy

Full Screen

1"""2Code for the AKNN classification rule from:3An adaptive nearest neighbor rule for classification4Akshay Balsubramani, Sanjoy Dasgupta, Yoav Freund, Shay Moran5https://arxiv.org/abs/1905.127176Author: Akshay Balsubramani7"""8import numpy as np, sklearn, time9import sklearn.metrics10from sklearn.neighbors import NearestNeighbors11import pynndescent12def aknn_predict(13 ref_data, 14 labels, 15 margin=1.0, 16 query_data=None, 17 max_k=100, 18 use_nndescent=False19):20 # itime = time.time()21 self_is_data = False22 if query_data is None:23 query_data = ref_data24 self_is_data = True25 26 itime = time.time()27 nbrs_list = _calc_nbrs_exact(ref_data, k=max_k, use_nndescent=False)28 print('Neighbor indices computed. Time:\t {}'.format(time.time() - itime))29 aknn_predictions = predict_nn_rule(nbrs_list, labels)30 print('AKNN predictions made. Time:\t {}'.format(time.time() - itime))31 return aknn_predictions32 """33 nbrs = sklearn.neighbors.NearestNeighbors(n_neighbors=max_k).fit(ref_data)34 if use_nndescent:35 index = pynndescent.NNDescent(raw_data, n_neighbors=k)36 indices, distances = index.neighbor_graph37 else:38 distances, indices = nbrs.kneighbors(query_data)39 if self_is_data:40 indices = indices[:, 1:]41 distinct_labels = np.unique(labels)42 rngarr = np.arange(indices.shape[1])+143 query_nbrs = labels[indices]44 fracs_labels = [np.cumsum(query_nbrs == i, axis=1)/rngarr for i in distinct_labels]45 46 thresholds = margin/np.sqrt(np.arange(indices.shape[1]) + 1)47 numlabels_predicted = np.add.reduce([f > (thresholds + 1.0/len(distinct_labels)) for f in fracs_labels])48 adaptive_k = np.argmax(numlabels_predicted > 0, axis=1)49 pred_labels = np.zeros(fracs_labels[0].shape[0]).astype(str)50 for i in range(fracs_labels[0].shape[0]):51 if adaptive_k[i] == 0:52 pred_labels[i] = '?'53 else:54 lst = [f[i, adaptive_k[i]] for f in fracs_labels]55 pred_labels[i] = distinct_labels[np.argmax(lst)]56 return np.array(pred_labels), np.array(adaptive_k)57 """58def predict_nn_rule(nbr_list_sorted, labels, margin=1.0):59 """60 Given matrix of ordered nearest neighbors for each point, returns AKNN's label predictions and adaptive neighborhood sizes.61 62 Parameters63 ----------64 nbr_list_sorted: array of shape (n_samples, n_neighbors)65 Indices of the `n_neighbors` nearest neighbors in the dataset, for each data point.66 labels: array of shape (n_samples)67 Dataset labels.68 69 margin: float70 The confidence parameter "A" from the AKNN paper.71 Returns72 -------73 pred_labels: array of shape (n_samples)74 AKNN label predictions on dataset.75 adaptive_ks: array of shape (n_samples)76 AKNN neighborhood sizes on dataset.77 78 emp_margins: array of shape (n_samples)79 Empirically calculated "advantage" of each point.80 """81 pred_labels = []82 adaptive_ks = []83 thresholds = margin/np.sqrt(np.arange(nbr_list_sorted.shape[1])+1)84 distinct_labels = np.unique(labels)85 for i in range(nbr_list_sorted.shape[0]):86 (pred_label, adaptive_k_ndx, _, emp_margin) = aknn(nbr_list_sorted[i,:], labels, thresholds)87 pred_labels.append(pred_label)88 adaptive_ks.append(adaptive_k_ndx + 1)89 emp_margins.append(emp_margin)90 return np.array(pred_labels), np.array(adaptive_ks), np.array(emp_margins)91def aknn(nbrs_arr, labels, thresholds, distinct_labels=['A','B','C','D','E','F','G','H','I','J']):92 """93 Apply AKNN rule for a query point, given its list of nearest neighbors.94 95 Parameters96 ----------97 nbrs_arr: array of shape (n_neighbors)98 Indices of the `n_neighbors` nearest neighbors in the dataset.99 labels: array of shape (n_samples)100 Dataset labels.101 102 thresholds: array of shape (n_neighbors)103 Bias thresholds at different neighborhood sizes.104 Returns105 -------106 pred_label: string107 AKNN label prediction.108 first_admissible_ndx: int109 n-1, where AKNN chooses neighborhood size n.110 111 fracs_labels: array of shape (n_labels, n_neighbors)112 Fraction of each label in balls of different neighborhood sizes.113 114 emp_margin: float115 Empirical "advantage" of the point, as specific by the AKNN paper.116 """117 query_nbrs = labels[nbrs_arr]118 mtr = np.stack([query_nbrs == i for i in distinct_labels])119 rngarr = np.arange(len(nbrs_arr))+1120 fracs_labels = np.cumsum(mtr, axis=1)/rngarr121 biases = fracs_labels - 1.0/len(distinct_labels)122 emp_margin = np.max(rngarr*biases*biases)123 numlabels_predicted = np.sum(biases > thresholds, axis=0)124 admissible_ndces = np.where(numlabels_predicted > 0)[0]125 first_admissible_ndx = admissible_ndces[0] if len(admissible_ndces) > 0 else len(nbrs_arr)126 # Break any ties between labels at stopping radius, by taking the most biased label127 pred_label = '?' if first_admissible_ndx == len(nbrs_arr) else distinct_labels[np.argmax(biases[:, first_admissible_ndx])]128 return (pred_label, first_admissible_ndx, fracs_labels, emp_margin)129def knn_rule(nbr_list_sorted, labels, k=10):130 """131 For benchmarking: given matrix of ordered nearest neighbors for each point, returns kNN rule's label predictions.132 133 Parameters134 ----------135 nbr_list_sorted: array of shape (n_samples, n_neighbors)136 Indices of the `n_neighbors` nearest neighbors in the dataset, for each data point.137 Returns138 -------139 array of shape (n_samples)140 Predictions of the k-NN rule for each data point.141 """142 toret = []143 for i in range(nbr_list_sorted.shape[0]):144 uq = np.unique(labels[nbr_list_sorted[i,:k]], return_counts=True)145 toret.append(uq[0][np.argmax(uq[1])])146 return np.array(toret)147def _calc_nbrs_exact(raw_data, k=1000, brute_force=False, use_nndescent=False, query_is_ref=True):148 """149 Calculate list of `k` exact Euclidean nearest neighbors for each point.150 151 Parameters152 ----------153 raw_data: array of shape (n_samples, n_features)154 Input dataset.155 Returns156 -------157 nbr_list_sorted: array of shape (n_samples, n_neighbors)158 Indices of the `n_neighbors` nearest neighbors in the dataset, for each data point.159 """160 if use_nndescent:161 index = pynndescent.NNDescent(raw_data, n_neighbors=k)162 indices, distances = index.neighbor_graph163 if query_is_ref:164 return indices[:, 1:]165 else:166 return indices167 if brute_force:168 a = sklearn.metrics.pairwise_distances(raw_data)169 nbr_list_sorted = np.argsort(a, axis=1)170 if query_is_ref:171 nbr_list_sorted = nbr_list_sorted[:, 1:]172 return nbr_list_sorted[:, :k]173 else:174 distances, indices = NearestNeighbors(n_neighbors=k+1).fit(raw_data).kneighbors(raw_data)175 if query_is_ref:176 return indices[:, 1:]177 else:...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run hypothesis automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful