How to use is_text method in avocado

Best Python code snippet using avocado_python

mhs_layout_analisys.py

Source:mhs_layout_analisys.py Github

copy

Full Screen

1import cv22import numpy as np3from utils import conditional_save, get_conditional_path4def cc_analisys(img) -> 'tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]':5 '''Find connected components and extract features from them.6 Get the connected components and their: area, density, bounding box, inner7 CCs and height/width rate.8 Args:9 img (cv2 image): inverse binary image.10 11 Returns:12 tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: area,13 density, bounding box, number of inner CCs and height/width rate for each14 connected component.15 '''16 n, _, cc, _ = cv2.connectedComponentsWithStats(img, connectivity=8, ltype=cv2.CV_32S)17 ### Análise dos Componentes Conexos18 area = np.zeros(n, dtype=np.int)19 density = np.zeros(n, dtype=np.float)20 rect = np.zeros((n, 4), dtype=np.int)21 inc = np.zeros(n, dtype=np.int)22 hw_rate = np.zeros(n)23 for i in range(1, n):24 h = cc[i, cv2.CC_STAT_HEIGHT]25 w = cc[i, cv2.CC_STAT_WIDTH]26 area[i] = cc[i, cv2.CC_STAT_AREA]27 density[i] = area[i] / (w*h)28 hw_rate[i] = min(w, h) / max(w, h)29 rect[i, [0,1]] = cc[i, [cv2.CC_STAT_LEFT, cv2.CC_STAT_TOP]]30 rect[i, [2,3]] = [w, h]31 for i in range(1, n):32 contained = (rect[:, 0] >= rect[i, 0]) & (rect[:, 0] + rect[:, 2] <= rect[i, 0] + rect[i, 2]) & (rect[:, 1] >= rect[i, 1]) & (rect[:, 1] + rect[:, 3] <= rect[i, 1] + rect[i, 3])33 contained[i] = False34 contained = contained & (area >= area[i] * 0.05)35 inc[i] = contained.sum()36 37 return area, density, rect, inc, hw_rate38def heuristic_filter(img, area: np.ndarray, density: np.ndarray, rect: np.ndarray, inc: np.ndarray, hw_rate: np.ndarray) -> 'tuple[np.ndarray, np.ndarray]':39 ''' Apply a heuristic filter to remove non-text elements from an image.40 Use the heuristic filter defined by (Tran et al. 2017) to identify and41 remove non-text elements from an image.42 Args:43 img (cv2 image): inverse binary image44 area (np.ndarray): areas (number of pixels) of the CCs45 density (np.ndarray): density of the CCs46 rect (np.ndarray): bounding boxes of the CCs47 inc (np.ndarray): number of contained CCs48 hw_rate (np.ndarray): height/width rate of the CCs49 Returns:50 tuple[np.ndarray, np.ndarray]: the image without the non-text elements,51 and a boolean mask for the text CCs.52 '''53 is_text = np.full(rect.shape[0], True, dtype=np.bool8)54 is_text = is_text & (rect[:, 0] > 0)55 is_text = is_text & (rect[:, 1] > 0)56 is_text = is_text & (rect[:, 0] + rect[:, 2] < img.shape[1])57 is_text = is_text & (rect[:, 0] + rect[:, 3] < img.shape[0])58 is_text = is_text & (inc <= 4)59 is_text = is_text & (area >= 20)60 is_text = is_text & ~((hw_rate < 0.1))# & (rect[:, 3] < rect[:, 2]))61 is_text = is_text & (density >= 0.06)62 # is_text = is_text & (density <= 0.9)63 out = img.copy() * 064 for x,y,w,h in rect[is_text]:65 out[y:y+h, x:x+w] = img[y:y+h, x:x+w]66 return out, is_text67def get_gradient(R, s: int, axis: int = 1, t: int = 0) -> np.ndarray:68 '''Calculate the gradient for the projection on the image69 Using the method outlined in (Tran et al. 2016), calculate the gradient of70 the horizontal (axis=0) or vertical (axis=1) projection.71 Args:72 R (cv2 image): region to calculate the gradient for73 s (int): smoothing parameter; window to smooth the projection74 axis (int): axis to project75 t (int): maximum number of pixels in a row to consider the row black76 Returns:77 np.ndarray: gradient of the projection78 '''79 ph = np.sum(R > 0, axis)80 ph[ph<t] = 081 zh = np.zeros_like(ph)82 # s = int(ph.shape[0] * 0.05)83 for x in range(zh.shape[0]):84 i = max(x - s, 0)85 j = min(x + s, zh.shape[0])86 zh[x] = np.floor(np.sum(ph[i:j] / (2*s)))87 if zh.shape[0] < 2:88 return np.array([0])89 gh = np.round(np.gradient(zh, edge_order=1)).astype(np.int)90 91 return gh92def check_homogeneity(R, s: int, axis: int = 1, t: int = 0) -> bool:93 '''Check if a region is homogeneous.94 Using the method outlined in (Tran et al. 2016), calculate the homogeneity95 structure of the region.96 Args:97 R (cv2 image): region to calculate the gradient for98 s (int): smoothing parameter; window to smooth the projection99 axis (int): axis to project100 t (int): maximum number of pixels in a row to consider the row black101 Returns:102 bool: whether the region is homogeneous103 '''104 gh = get_gradient(R, s, axis, t=t)105 lh = [t for t in range(gh.shape[0]-1) if (gh[t] < 0 and gh[t+1] >= 0) or (gh[t] > 0 and gh[t] <= 0)]106 delta = np.array([lh[i+1] - lh[i] for i in range(len(lh)-1)])107 if delta.shape[0] > 0:108 v = np.var(delta)109 return v <= 50110 111 return True112def get_lines(R, axis: int, t: int = 0) -> 'tuple[tuple[list[int], list[int]], tuple[list[int], list[int]]]':113 '''Find the black and white lines of a region.114 Use the horizontal or vertical projection to find black lines and white115 lines in the region, respecting the threshold.116 Args:117 R (cv2 image): region to find the lines118 axis (int): axis to project119 t (int): maximum number of pixels in a row to consider the row a white line120 121 Returns:122 tuple[tuple[list[int], list[int]], tuple[list[int], list[int]]]: index123 and heights of the white lines and black lines found.124 '''125 p = np.sum(R > 0, axis=axis)126 flags = np.zeros_like(p, dtype=np.bool)127 heights = np.zeros_like(p)128 prev = p[0]129 # flag = True -> black line130 flags = p > t131 heights[0] = 1132 133 for i in range(1, p.shape[0]):134 if (p[i] <= t and prev <= t) or (p[i] > t and prev > t):135 heights[i] = heights[i-1] + 1136 else:137 heights[i] = 1138 139 prev = p[i]140 141 white = []142 black = []143 white_heights = []144 black_heights = []145 bounds = [b for b in np.argwhere(heights == 1).flatten()] + [heights.shape[0]]146 for b in range(len(bounds) - 1):147 start, end = bounds[b], bounds[b+1]148 if flags[start]:149 black.append((end + start) // 2)150 black_heights.append(np.max(heights[start:end]))151 else:152 white.append((end + start) // 2)153 white_heights.append(np.max(heights[start:end]))154 155 return (white, white_heights), (black, black_heights)156def find_last_before(white: 'list[int]', x: int) -> int:157 '''Find the last white line before a certain position.158 Args:159 white (list[int]): list of white lines160 x (int): position161 162 Returns:163 int: the index for the last white line before x, -1 if no white line exists before x164 '''165 k = -1166 for i in range(len(white)):167 if white[i] < x:168 k = i169 else:170 break171 return k172def get_division(R, axis: int, t: int = 0) -> 'list[tuple[int, int]]':173 '''Calculates the positions to divide the region.174 Use the height of black and white lines in the region to calculate the cutting point.175 Args:176 R (cv2 image): region to find the lines177 axis (int): axis to project178 t (int): maximum number of pixels in a row to consider the row a white line179 Returns:180 list[tuple[int, int]]: list of cuts to make along the specified axis181 '''182 (white, white_heights), (black, black_heights) = get_lines(R, axis, t)183 184 wi = np.argwhere((white_heights == np.max(white_heights)) & (white_heights > np.median(white_heights))).flatten() if len(white) > 0 else np.array([])185 bi = np.argwhere((black_heights == np.max(black_heights)) & (black_heights > np.median(black_heights))).flatten() if len(black) > 0 else np.array([])186 div = []187 wdiv = []188 bdiv = []189 if wi.shape[0] > 0: # white division190 prev = 0191 for w in wi:192 wdiv.append((prev, white[w] - white_heights[w] // 2))193 prev = white[w] + white_heights[w] // 2194 wdiv.append((prev, R.shape[1-axis]))195 if bi.shape[0] > 0: # black division196 prev = 0197 for b in bi:198 i = find_last_before(white, black[b])199 if i != -1:200 first = white[i]201 second = white[i+1] if i+1 < len(white) else first202 first = white[b] if b < len(white) else white[-1]203 second = white[b+1] if b+1 < len(white) else white[-1]204 if first == second:205 bdiv.append((prev, first - white_heights[i] // 2))206 prev = first + white_heights[i] // 2207 else:208 bdiv.append((prev, first - white_heights[i] // 2))209 bdiv.append((first + white_heights[i] // 2, second - white_heights[i+1] // 2))210 prev = second211 if prev > 0:212 bdiv.append((prev, R.shape[1-axis]))213 214 divs = []215 for d in wdiv + bdiv:216 divs.extend(d)217 divs = sorted(list(set(divs))) # remove duplicates and sort218 divs = [(divs[i], divs[i+1]) for i in range(len(divs)-1)]219 220 return divs221def recursive_splitting(img, rect: np.ndarray, is_text: np.ndarray, area: np.ndarray, t: float = 0.01, do_filter: bool = True) -> 'tuple[list, list[np.ndarray]]':222 '''Split an image into homogeneous regions.223 Use the method describe by (Tran et al. 2016) to split the image into224 multiple homogeneous regions.225 Args:226 img (cv2 image): the image to split227 rect (np.ndarray): bounding box of the all the CCs228 is_text (np.ndarray): boolean mask for the text CCs229 area (np.ndarray): area (number of filled pixels) for each CCs230 t (float): the threshold of pixels to ignore when computing homogeneity231 do_filter (bool): whether to execute the recursive filter when splitting.232 Returns:233 tuple[list, list[np.ndarray]]: list of regions and their coordinates on the original image.234 '''235 finished_regions = []236 finished_coords = []237 regions = [img]238 coords = [(0, 0, img.shape[1], img.shape[0])]239 all_coords = [coords[0]]240 new_regions = [0]241 while len(new_regions) > 0:242 new_regions = []243 new_homo = []244 new_coords = []245 for i in range(len(regions)):246 # print('in', coords[i])247 x, y, w, h = coords[i]248 # s = int(np.sqrt(w*h) * 0.05)249 homo = check_homogeneity(regions[i], int(w*0.05), 0, int(w*t)) and check_homogeneity(regions[i], int(h*0.05), 1, int(h*t))250 if homo:251 # print('homo!')252 finished_regions.append(regions[i])253 finished_coords.append(coords[i])254 else:255 hdivs = get_division(regions[i], 1, int(w * t))256 vdivs = get_division(regions[i], 0, int(h * t))257 divs = []258 for h in hdivs:259 for v in vdivs:260 x1, x2 = min(v[0], v[1]), max(v[0], v[1])261 y1, y2 = min(h[0], h[1]), max(h[0], h[1])262 divs.append((x1, x2, y1, y2))263 # print('got', len(divs), 'divisions')264 265 for x1,x2,y1,y2 in divs:266 rct = (x+x1, y+y1, x2-x1, y2-y1)267 if x2-x1 > 3 and y2-y1 > 3 and rct not in all_coords:268 # print('found', rct)269 if do_filter:270 filtered = regions[i][y1:y2, x1:x2].copy()271 recursive_filter(filtered, rct, rect, is_text, area)272 if converge(regions[i][y1:y2, x1:x2], filtered):273 finished_regions.append(filtered)274 finished_coords.append(rct)275 else:276 new_coords.append(rct)277 new_regions.append(filtered)278 else:279 new_coords.append(rct)280 new_regions.append(regions[i][y1:y2, x1:x2])281 # if new_regions[-1].shape[0] != rct[3] or new_regions[-1].shape[1] != rct[2]:282 # print(new_regions[-1].shape, rct)283 all_coords.append(rct)284 if len(divs) == 0:285 # print('unable to divide')286 finished_regions.append(regions[i])287 finished_coords.append(coords[i])288 289 # print('scanned', len(regions), 'regions.', len(new_regions), 'new regions found')290 regions = new_regions291 homo = new_homo292 coords = new_coords293 294 return finished_regions, finished_coords295### Filtro Recursivo296def converge(region, after_filter) -> bool:297 '''Check the regions against the convergence criteria.298 Args:299 region (cv2 image): region before operation300 after_filter (cv2 image): region after operation301 Returns:302 bool: True if the algorithm converged for this region303 '''304 Su = np.sum(region)305 Sv = np.sum(after_filter)306 return Su == Sv or Sv == 0307def compute_k(omega: np.ndarray) -> float:308 '''Calculate the k-value for each omega list309 Args:310 omega (np.ndarray): array of widths, heights or areas of the CCs in the region311 312 Returns:313 float: the k calculated by the formula defined in (Tran et al. 2016)314 '''315 return max(np.mean(omega) / np.median(omega), np.median(omega) / np.mean(omega))316def compute_suspected_max(omega: np.ndarray, k: float) -> np.ndarray:317 '''Find the suspected non-text elements by the maximum-median filter.318 Args:319 omega (np.ndarray): array of widths, heights or areas of the CCs in the region320 k (float): the k calculated by the formula defined in (Tran et al. 2016)321 322 Returns:323 np.array: boolean mask for the suspected non-text elements324 '''325 return (omega == np.max(omega)) & (omega > k * np.median(omega))326def compute_suspected_min(omega, k):327 '''Find the suspected non-text elements by the minimum-median filter.328 Args:329 omega (np.ndarray): array of widths, heights or areas of the CCs in the region330 k (float): the k calculated by the formula defined in (Tran et al. 2016)331 332 Returns:333 np.array: boolean mask for the suspected non-text elements334 '''335 return (omega == np.min(omega)) & (omega < np.median(omega) / k)336def is_in_range(v: np.ndarray, start: int, end: int) -> np.ndarray:337 '''Checks if the elements in a vector lie in an interval.338 Args:339 v (np.ndarray): vector of CCs to check340 start (int): start of the interval341 end (int): end of the interval342 343 Returns:344 np.ndarray: boolean mask for the CCs that are in the range345 '''346 return (v > start) & (v < end)347def get_neigh(CCu: np.ndarray) -> 'tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]':348 '''Find the neighbouring CCs for each CC.349 Use the method described by (Chen et al. 2013) to calculate the neighbours of a CC350 Args:351 CCu (np.ndarray): all the CCs to use in the analysis352 353 Returns:354 tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: LNN (left nearest355 neighbour), RNN (right nearest neighbour), LNWS (left nearest white356 space) and RNWS (right nearest whitespace).357 '''358 lnn = np.zeros(CCu.shape[0])359 rnn = np.zeros(CCu.shape[0])360 lnws = np.zeros(CCu.shape[0])361 rnws = np.zeros(CCu.shape[0])362 363 for i in range(CCu.shape[0]):364 CCi = CCu[i]365 overlap1 = is_in_range(CCu[:, 1], CCi[1], CCi[1] + CCi[3])366 overlap2 = is_in_range(CCu[:, 1] + CCu[:, 3], CCi[1], CCi[1] + CCi[3])367 overlap3 = is_in_range(CCi[1], CCu[:, 1], CCu[:, 1] + CCu[:, 3])368 overlap4 = is_in_range(CCi[1] + CCi[3], CCu[:, 1], CCu[:, 1] + CCu[:, 3])369 vertical_overlap = overlap1 | overlap2 | overlap3 | overlap4370 ws_left = (CCu[:,0] + CCu[:,2]) - CCi[0]371 ws_right = CCu[:,0] - (CCi[0] + CCi[2])372 _lnn = np.argsort(ws_left)373 _rnn = np.argsort(ws_right)374 _lnn = _lnn[vertical_overlap[_lnn]]375 _rnn = _rnn[vertical_overlap[_rnn]]376 lnn[i] = _lnn[0] if _lnn.shape[0] > 0 else -1377 rnn[i] = _rnn[0] if _rnn.shape[0] > 0 else -1378 lnws[i] = ws_left[_lnn[0]] if _lnn.shape[0] > 0 else -1379 rnws[i] = ws_right[_rnn[0]] if _rnn.shape[0] > 0 else -1380 381 return lnn, rnn, lnws, rnws382def get_cc_in_region(region: np.ndarray, cc: np.ndarray) -> 'list[tuple[int, int, int, int]]':383 '''Find all the CCs contained in a region.384 Args:385 region (np.ndarray): bounding box of the region386 cc (np.ndarray): bounding box for all of the CCs in the image387 388 Returns:389 list[tuple[int, int, int, int]]: list of the bounding boxes of all the390 CCs contained in the region.391 '''392 return [(cc[i][0],cc[i][1],cc[i][2],cc[i][3], i) for i in range(cc.shape[0]) if cc[i][0] > region[0] and cc[i][0]+cc[i][2] < region[0]+region[2] and cc[i][1] > region[1] and cc[i][1]+cc[i][3] < region[1]+region[3]]393def recursive_filter(region, coords: np.ndarray, rect: np.ndarray, is_text: np.ndarray, area: np.ndarray):394 '''Apply the recursive filter to a region.395 Use the recursive filter described by (Tran et al. 2016) to eliminate396 non-text elements not caught by the heuristic filter.397 Args:398 region (cv2 image): image to apply the filter399 coords (np.ndarray): bounding box of the region400 rect (np.ndarray): bounding box of the all the CCs401 is_text (np.ndarray): boolean mask for the text CCs402 area (np.ndarray): area (number of filled pixels) for each CCs403 '''404 CCs = np.array(get_cc_in_region(coords, rect[is_text]))405 if CCs.shape[0] == 0: return406 indicies = CCs[:,-1]407 CCu = CCs[:,:-1]408 omega1 = area[is_text][indicies]#np.array([CCi[2]*CCi[3] for CCi in CCu])409 omega2 = np.array([CCi[3] for CCi in CCu])410 omega3 = np.array([CCi[2] for CCi in CCu])411 412 lnn, rnn, lnws, rnws = get_neigh(CCu)413 414 num_ln = np.array([(lnn == i).sum() for i in range(lnn.shape[0])])415 num_rn = np.array([(rnn == i).sum() for i in range(rnn.shape[0])])416 ws = rnws[rnws > 0] if (rnws>0).any() else np.array([0])417 k1, k2, k3 = compute_k(omega1), compute_k(omega2), compute_k(omega3)418 # maximum median filter419 suspected = compute_suspected_max(omega1, k1) & (compute_suspected_max(omega2, k2) | compute_suspected_max(omega2, k3))420 lnws[lnws == -1] = 1e10421 rnws[rnws == -1] = 1e10422 mi = np.min([lnws, rnws], axis=0)423 cond1 = mi > max(np.median(ws), np.mean(ws))424 lnws[lnws == 1e10] = -1425 rnws[rnws == 1e10] = -1426 ma = np.max([lnws, rnws], axis=0)427 cond1 &= (ma == np.max(ws)) | (mi > 2 * np.mean(ws))428 cond2 = (num_ln == np.max(num_ln)) & (num_ln > 2)429 cond2 |= (num_rn == np.max(num_rn)) & (num_rn > 2)430 non_text = suspected & (cond1 | cond2)431 # minimum median filter432 suspected = compute_suspected_min(omega2, k2) | compute_suspected_min(omega3, k3)433 lnws[lnws == -1] = 1e10434 rnws[rnws == -1] = 1e10435 mi = np.min([lnws, rnws], axis=0)436 cond1 = mi > max(np.median(ws), np.mean(ws))437 non_text |= suspected & cond1438 i = 0439 for x,y,w,h in CCu[non_text]:440 x -= coords[0]441 y -= coords[1]442 cv2.rectangle(region, (x, y), (x+w, y+h), 0, -1)443 is_text[is_text][indicies[i]] = False444 i += 1445### Classificação Multi-Layer446def multi_layer(img, rect: np.ndarray, is_text: np.ndarray, area: np.ndarray, t: float = 0):447 '''Apply the multy-layer classification to an image.448 Use the method described by (Tran et al. 2017) to eliminate further non-text449 elements.450 Args:451 img (cv2 image): image to apply the ML classification452 rect (np.ndarray): bounding box of the all the CCs453 is_text (np.ndarray): boolean mask for the text CCs454 area (np.ndarray): area (number of filled pixels) for each CCs455 t (float): the threshold of pixels to ignore456 457 Returns:458 cv2 image: text image after the removal of all the non-text elements459 '''460 prev = img.copy() * 0461 current = img.copy()462 i = 0463 while not converge(prev, current):464 rs = []465 cs = []466 hdivs = get_division(current, 1, int(img.shape[0] * t))467 vdivs = get_division(current, 0, int(img.shape[1] * t))468 divs = []469 for h in hdivs:470 for v in vdivs:471 x1, x2 = min(v[0], v[1]), max(v[0], v[1])472 y1, y2 = min(h[0], h[1]), max(h[0], h[1])473 divs.append((x1, x2, y1, y2)) 474 for x1,x2,y1,y2 in divs:475 rct = (x1, y1, x2-x1, y2-y1)476 cs.append(rct)477 rs.append(current[y1:y2, x1:x2])478 479 prev = current480 current = current.copy() * 0481 for i in range(len(rs)):482 recursive_filter(rs[i], cs[i], rect, is_text, area)483 x,y,w,h = cs[i]484 current[y:y+h, x:x+w] = rs[i]485 i += 1486 # print(i, 'iterations')487 return current488def segment(img_bw, temp_folder: str = None, output_path: str = None) -> 'tuple[np.ndarray, list, list[np.ndarray]]':489 '''Segment an image using an MHS based approach.490 Implements a MHS (Tran et al. 2017) based approach for document text region491 identification based on homogeneity.492 Args:493 img_bw (cv2 image): binarized image to segment494 temp_folder (str): folder to save intermediary files to, if None does not save. default=None495 output_path (str): path to the resulting image with only text elements, if None does not save. default=None496 497 Returns:498 tuple[np.ndarray, list, list[np.ndarray]]: the text document, a list of499 all the regions and all of their coordinates.500 '''501 _, thresh = cv2.threshold(img_bw, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)502 area, density, rect, inc, hw_rate = cc_analisys(thresh)503 thresh, is_text = heuristic_filter(thresh, area, density, rect, inc, hw_rate)504 conditional_save(thresh, get_conditional_path('heuristic_filter.png', temp_folder))505 # in case there is a text element that is now empty, make it non-text506 for i in range(rect.shape[0]):507 if is_text[i]:508 x,y,w,h = rect[i]509 is_text[i] = np.any(thresh[y:y+h,x:x+w] > 0)510 511 if temp_folder:512 img_boxes = thresh.copy()513 for r in rect[is_text]:514 x,y,w,h = r515 cv2.rectangle(img_boxes, (x,y), (x+w,y+h), 128, 2)516 conditional_save(img_boxes, get_conditional_path('text_ccs.png', temp_folder))517 518 # print('before:', is_text.sum())519 rs, cs = recursive_splitting(thresh, rect, is_text, area, t=0.01)520 # print('after:', is_text.sum())521 522 # remove empty(-ish) regions523 new_rs = [rs[i] for i in range(len(rs)) if np.sum(rs[i] > 0) / (cs[i][2]*cs[i][3]) > 0.01]524 new_cs = [cs[i] for i in range(len(rs)) if np.sum(rs[i] > 0) / (cs[i][2]*cs[i][3]) > 0.01]525 526 rs, cs = new_rs, new_cs527 if temp_folder:528 img_boxes = thresh.copy()529 for r in cs:530 x,y,w,h = r531 cv2.rectangle(img_boxes, (x,y), (x+w,y+h), 128, 2)532 conditional_save(img_boxes, get_conditional_path('multilevel_regions.png', temp_folder))533 534 img = thresh.copy() * 0535 for i in range(len(rs)):536 x,y,w,h = cs[i]537 img[y:y+h, x:x+w] = rs[i]538 conditional_save(img_boxes, get_conditional_path('multi_level.png', temp_folder))539 540 # remove the text CCs now empty541 CCt = np.argwhere(is_text).flatten()542 for i in CCt:543 x,y,w,h = rect[i]544 if np.sum(img[y:y+h, x:x+w] > 0) == 0:545 is_text[i] = False546 # print('before:', is_text.sum())547 img = multi_layer(img, rect, is_text, area, t=0.01)548 # print('after:', is_text.sum())549 conditional_save(img, get_conditional_path('multi_layer.png', temp_folder))550 551 ### Segmentação de Regiões Homogêneas552 rs, cs = recursive_splitting(img, rect, is_text, area, t=0, do_filter=False)553 new_rs = [rs[i] for i in range(len(rs)) if np.sum(rs[i] > 0) / (cs[i][2]*cs[i][3]) > 0.01]554 new_cs = [cs[i] for i in range(len(rs)) if np.sum(rs[i] > 0) / (cs[i][2]*cs[i][3]) > 0.01]555 rs, cs = new_rs, new_cs556 if temp_folder:557 img_boxes = img.copy()558 for r in cs:559 x,y,w,h = r560 cv2.rectangle(img_boxes, (x,y), (x+w,y+h), 128, 2)561 conditional_save(img_boxes, get_conditional_path('mhs_boxes.png', temp_folder))562 563 conditional_save(img, output_path)564 ...

Full Screen

Full Screen

language.py

Source:language.py Github

copy

Full Screen

1"""2Copyright (c) 2017 Wind River Systems, Inc.3Licensed under the Apache License, Version 2.0 (the "License");4you may not use this file except in compliance with the License.5You may obtain a copy of the License at:6 http://www.apache.org/licenses/LICENSE-2.07Unless required by applicable law or agreed to in writing, software distributed8under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES9OR CONDITIONS OF ANY KIND, either express or implied.10"""11from abc import ABCMeta12class LanguageType(ABCMeta):13 def __repr__(cls):14 return cls.string_repr15 def __eq__(cls, other):16 return cls.string_repr == other.string_repr17 def __ne__(cls, other):18 return cls.string_repr != other.string_repr19class Language(object):20 """Defines set of supported file languages and their respective file extensions21 """22 class Unknown(metaclass=LanguageType):23 string_repr = "unknown"24 is_text = False25 is_binary = False26 is_source_code = False27 extensions = []28 class Binary(metaclass=LanguageType):29 string_repr = "binary"30 is_text = False31 is_binary = True32 is_source_code = False33 extensions = []34 class PlainText(metaclass=LanguageType):35 string_repr = "all"36 is_text = True37 is_binary = False38 is_source_code = False39 extensions = ["txt", "text", "xml", "html", "xsl", "xspf"]40 class C(metaclass=LanguageType):41 string_repr = "c"42 is_text = True43 is_binary = False44 is_source_code = True45 extensions = ["c", "cc", "cp", "cpp", "c++", "cxx", "h", "hh", "hxx", "hpp", "h++", "moc"]46 class Python(metaclass=LanguageType):47 string_repr = "python"48 is_text = True49 is_binary = False50 is_source_code = True51 extensions = ["py", "rpy", "pyt", "pyw", "pym", "re"]52 class Java(metaclass=LanguageType):53 string_repr = "java"54 is_text = True55 is_binary = False56 is_source_code = True57 extensions = ["java", "jsp", "j"]58 class Shell(metaclass=LanguageType):59 string_repr = "shell"60 is_text = True61 is_binary = False62 is_source_code = True63 extensions = ["sh", "csh", "ksh", "run", "bsh", "bash"]64 class Perl(metaclass=LanguageType):65 string_repr = "perl"66 is_text = True67 is_binary = False68 is_source_code = True69 extensions = ["pl"]70 class Javascript(metaclass=LanguageType):71 string_repr = "javascript"72 is_text = True73 is_binary = False74 is_source_code = True75 extensions = ["js", "javascript", "json"]76 class Scala(metaclass=LanguageType):77 string_repr = "scala"78 is_text = True79 is_binary = False80 is_source_code = True81 extensions = ["scala"]82 class MSDOS(metaclass=LanguageType):83 string_repr = "msdos"84 is_text = True85 is_binary = False86 is_source_code = True87 extensions = ["bat"]88 class Haskell(metaclass=LanguageType):89 string_repr = "haskell"90 is_text = True91 is_binary = False92 is_source_code = True93 extensions = ["hs", "lhs"]94 class PHP(metaclass=LanguageType):95 string_repr = "php"96 is_text = True97 is_binary = False98 is_source_code = True99 extensions = ["php"]100 class Patch(metaclass=LanguageType):101 string_repr = "patch"102 is_text = True103 is_binary = False104 is_source_code = True105 extensions = ["patch"]106 class Pascal(metaclass=LanguageType):107 string_repr = "pascal"108 is_text = True109 is_binary = False110 is_source_code = True111 extensions = ["p"]112 @staticmethod113 def language_list():114 return [getattr(Language, attr) for attr in Language.__dict__.keys() \115 if type(getattr(Language, attr)) == LanguageType]116 @staticmethod117 def text_languages():118 return [str(lang) for lang in Language.language_list() if lang.is_text]119 @staticmethod120 def guess_language(file_extension):121 for lang in Language.language_list():122 if file_extension in lang.extensions:123 return lang...

Full Screen

Full Screen

test_trim.py

Source:test_trim.py Github

copy

Full Screen

...35 self.assertEqual('\n', trim.trim(''))36 self.assertEqual('\n', trim.trim('\n'))37 def test_trim_should_leave_leading_whitespace(self):38 self.assertEqual(' abc\n', trim.trim(' abc\n'))39 def test_is_text(self):40 self.assertTrue(trim.is_text(os.path.join(ROOT_DIR, 'README.rst')))41 self.assertTrue(trim.is_text(os.path.join(ROOT_DIR, 'trim')))42 self.assertFalse(trim.is_text(sys.executable))43 self.assertFalse(trim.is_text('/bin/bash'))44 self.assertFalse(trim.is_text('/usr/bin/env'))45 self.assertFalse(trim.is_text('non_existent_file'))46 def test_is_text_should_consider_symlinks_as_non_text(self):47 self.assertFalse(trim.is_text(os.path.join(ROOT_DIR, 'trim.py')))48 def test_is_text_should_consider_whitespace_only_as_text(self):49 import tempfile50 with tempfile.NamedTemporaryFile(mode='w') as temporary_file:51 temporary_file.write(' ')52 temporary_file.flush()53 self.assertTrue(trim.is_text(temporary_file.name))54 def test_is_text_should_consider_empty_files_as_non_text(self):55 import tempfile56 with tempfile.NamedTemporaryFile(mode='w') as temporary_file:57 temporary_file.write('')58 temporary_file.flush()59 self.assertFalse(trim.is_text(temporary_file.name))60 def test_system(self):61 text = 'abc \n 1234 \n\n \n'62 import tempfile63 with tempfile.NamedTemporaryFile(delete=False,64 mode='w') as temporary_file:65 temporary_file.write(text)66 import subprocess67 process = subprocess.Popen([sys.executable,68 os.path.join(ROOT_DIR, 'trim'),69 temporary_file.name],70 stderr=subprocess.PIPE)71 process.communicate()72 self.assertEqual(0, process.returncode)73 with open(temporary_file.name) as input_file:...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run avocado automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful