How to use headers_array method in Playwright Python

Best Python code snippet using playwright-python

table_preprocess.py

Source:table_preprocess.py Github

copy

Full Screen

1#!/usr/bin/env python2# -*- coding:utf-8 -*-3"""4 脚本名: 表格预处理5Created on 2018-06-136@author:David Yisun7@group:data8"""9import os10from bs4 import BeautifulSoup11import codecs12import re13import itertools14import numpy as np15# 读入htmls 以字典形式保存16def read_html():17 classify = ['重大合同', '增减持', '定增']18 file_list = []19 for _classify in classify:20 path = './data/round2_adjust/{0}/html/'.format(_classify)21 files_name = os.listdir(path)22 file_list = [{'file_name': i, 'path': path + i, 'classify': _classify} for i in files_name] + file_list23 html_dict = {}24 text_dict = {}25 for i, _file in enumerate(file_list):26 with codecs.open(_file['path'], 'r', 'utf8') as f:27 data = f.read()28 print('read {0}'.format(_file['file_name']))29 # 去掉换行符30 data = re.sub(re.compile('>\n* *<'), '><', data)31 data = re.sub(re.compile('\n'), '', data)32 _html = BeautifulSoup(data, 'lxml', from_encoding='utf-8')33 html_dict[_file['file_name']] = {'classify': _file['classify'], 'h': _html}34 text_dict[_file['file_name']] = {'classify': _file['classify'], 't': data}35 return html_dict36"""37输出样例: html_dict['100103.html']['h']38 { '100103.html':{'classify': '定增',39 'h': <html><head></head><body><div title="辽宁成………………}40 } 41"""42def get_all_tables():43 # --- 获取所有表格进行分析 ---44 tables_tag = []45 html_dict = read_html()46 for index in html_dict:47 print(index)48 t = html_dict[index]49 # 删去不含text 的"tr" "tbody"50 # --- tr51 m = t['h'].find_all('tr')52 for j in m:53 if j.find_all(text=True) == []:54 j.decompose()55 # --- tbody56 m = t['h'].find_all('tbody')57 for j in m:58 if j.find_all(text=True) == []:59 j.decompose()60 _tables_tag = t['h'].find_all('tbody')61 # 不含表格的公告滤过62 if _tables_tag == []:63 continue64 # 过滤空表格65 tables_tag = tables_tag+list(itertools.zip_longest([index], _tables_tag, fillvalue=index))66 # --- 扣除 《释义》67 tables_tag_new = []68 for i, t in enumerate(tables_tag):69 text = t[0]70 annotation = t[1].find_all(text=re.compile('^ *指 *$'))71 if len(annotation) > 10:72 continue73 tables_tag_new.append(t)74 return tables_tag_new75def text_type(s):76 """77 判断单元格数据类型78 :param s:79 :return:80 """81 res = 'string'82 return res83 84def td_processing(td):85 """86 单元格信息87 :param td: tag 单元格88 :return: 字典包括以下:89 内容 类型 跨列 跨行90 """91 td_rowspan = 192 td_colspan = 193 if td.has_attr('rowspan'):94 td_rowspan = int(td['rowspan'])95 if td.has_attr('colspan'):96 td_colspan = int(td['colspan'])97 # -填充空单元格98 if td.text == '':99 for t_child in td.stripped_strings:100 t_child.replace_with('---')101 print(text_type(td.text))102 td_type = np.array([[text_type(td.text)] * td_colspan] * td_rowspan).reshape(td_rowspan, td_colspan)103 td_content = np.array([[td.text] * td_colspan] * td_rowspan).reshape(td_rowspan, td_colspan)104 res = {'td_content': td_content,105 'td_type': td_type,106 'td_colspan': td_colspan,107 'td_rowspan': td_rowspan}108 return res109def tr_processing(tr):110 """111 行信息112 :param tr: tag 行113 :return: 字典114 """115 print('good boy')116 tr_most_rowspan = 1 # 最大跨行数117 tr_most_colspan = 1 # 最大跨列数118 tds = tr.find_all('td')119 n_tds = len(tds) # 行所含单元格数120 tr_content = [] # 行各单元内容121 tr_type = [] # 行各单元格数据类型122 tr_cols = 0 # 行长度123 count_tr_colspan = 0 # colspan大于1的td个数统计124 for td in tds:125 data = td_processing(td)126 if data['td_colspan'] > tr_most_colspan:127 tr_most_colspan = data['td_colspan']128 if data['td_rowspan'] > tr_most_rowspan:129 tr_most_rowspan = data['td_rowspan']130 tr_content.append(data['td_content'])131 tr_type.append(data['td_type'])132 tr_cols = tr_cols+data['td_colspan']133 if data['td_colspan'] > 1:134 count_tr_colspan += 1135 if count_tr_colspan == n_tds:136 all_has_multi_colspan = True137 else:138 all_has_multi_colspan = False139 res = {'tr_most_colspan': tr_most_colspan,140 'tr_most_rowspan': tr_most_rowspan,141 'n_tds': n_tds,142 'all_has_multi_colspan': all_has_multi_colspan,143 'tr_content': tr_content,144 'tr_cols': tr_cols}145 return res146def find_title(tr):147 """148 查找表格title149 :param tr:150 :return: 若有 输出 text; 否则 输出 -1151 """152 data = tr_processing(tr)153 if data['n_tds'] == 1:154 return data['tr_content'][0][0][0]155 else:156 return -1157def check_headers(tr):158 """159 检查表头headers160 :param tr:161 :return:162 """163 data = tr_processing(tr)164 type = ''165 headers_array = np.empty(shape=(data['tr_most_rowspan'], data['tr_cols']), dtype='object')166 res = {'type': type,167 'headers_array': headers_array}168 # --- 整个header被分为多个独立子header 暂不处理 example 10169 if data['all_has_multi_colspan']:170 res['type'] = 'multi_sub_tables'171 return res172 # --- 连续整行 暂不处理173 if data['n_tds'] == 1:174 res['type'] = 'continous_rows'175 return res176 # --- 单行多列 直接提取177 if data['tr_most_colspan'] == 1 and data['tr_most_rowspan'] == 1:178 pass179 # --- 不含rowspan 部分td有colspan 左右拆该单元格 然后将其下数据合并 记录colspan的位置 example 8180 # --- 不含colspan 部分td有rowspan 先拆后合181def complete_headers(tr, headers_array):182 """183 完善表头184 :param tr:185 :param headers_array:186 :return:187 """188 return189def table_processing(tbody):190 """191 表格处理192 :param tbody:str html格式193 :return: df or str : pandas dateframe 标准二维表 或者是 无法识别的表格类型194 type: ['df', 'df_no_title', 'no_parse', 'part_df_content', 'only_one']195 df: ndarray 完全解析196 df_no_title: ndarray 无title197 no_parse: html.tbody 无法解析198 part_df_content: ndarray+html.trs 部分解析199 only_one: list 独行200 """201 trs = tbody.find_all('tr', recursive=False)202 n_row = len(trs) # 表行数203 title = None # 表名204 headers_type = '' # 表头类型205 headers = [] # 表头206 headers_array = np.array([None]) # 表头矩阵207 fields_type = [] # 表字段类型208 type = ''209 # --- 逐行填表 ---210 for i, tr in enumerate(trs):211 # --- check title ---212 if title == None:213 title = find_title(tr)214 if title != -1: # 表格内部含title 迭代下一个tr215 continue216 else: # 有 title 定义为多表嵌套(分为母子表和并列表) 暂不考虑217 sub_title = find_title(tr)218 if sub_title != -1: # 存在多表嵌套219 type = 'multi-tables'220 # --- check headers ---221 if headers_type == '':222 # 还没有表头223 headers_array = check_headers(tr)224 continue225 if headers_type == 'part_headers':226 # 残缺表头227 headers_array = complete_headers(tr, headers_array)228if __name__ == '__main__':229 tables = get_all_tables()230 data_list = []231 for i, t in enumerate(tables):232 text = t[0]233 print('{0}:{1}'.format(i, text))234 d = table_processing(t[1])235 if d == None:236 continue237 if d['all_has_multi_colspan'] and d['tr_most_rowspan']== True:238 data_list.append(t)...

Full Screen

Full Screen

decisionTrees.py

Source:decisionTrees.py Github

copy

Full Screen

1" Created by Ecem Balıkçı on 1/11/2021 at 7:16 AM (Contact: balikci8ecem@gmail.com) "2import csv3import numpy as np4import matplotlib.pyplot as plt5from sklearn.tree import export_text6from sklearn.tree import DecisionTreeRegressor78exp_list = np.array([])9salary_list = np.array([])10age_list = np.array([])11pow_list = np.array([])12headers_arr = np.array([])13headers_array = np.array([])14with open("team_big.csv", encoding='Latin-1') as f:15 csv_list = list(csv.reader(f))16 for a in csv_list:17 if a == csv_list[0]:18 headers_arr = np.append(exp_list, csv_list[0])19 headers_array = np.append(headers_array, headers_arr[4])20 headers_array = np.append(headers_array, headers_arr[6])21 headers_array = np.append(headers_array, headers_arr[7])2223 if a != csv_list[0]:24 exp_list = np.append(exp_list, int(a[6]))25 salary_list = np.append(salary_list, int(a[8]))26 age_list = np.append(age_list, int(a[4]))27 pow_list = np.append(pow_list, float(a[7]))2829X = np.column_stack((age_list, exp_list, pow_list))30y = salary_list3132x_train = X[:30]33y_train = y[:30]34x_test = X[30:]35y_test = y[30:]3637reg_1 = DecisionTreeRegressor(random_state=0, max_depth=1)38reg_1.fit(x_train, y_train)39y_hat = reg_1.predict(x_test)40mse = np.mean(np.square(y_hat - y_test))41print("☘☘☘☘☘☘☘☘☘<<<<<<<<<<Results for Decision Tree 1>>>>>>>>>>☘☘☘☘☘☘☘☘")42print("MSE: ", mse)43print("The feature importances: ", reg_1.feature_importances_)44print()45titles = export_text(reg_1, feature_names=[headers_array[0], headers_array[1], headers_array[2]])46print(titles)4748reg_2 = DecisionTreeRegressor(random_state=0, max_depth=3)49reg_2.fit(x_train, y_train)50y_hat_2 = reg_2.predict(x_test)51mse = np.mean(np.square(y_hat_2 - y_test))52print("☘☘☘☘☘☘☘☘☘<<<<<<<<<<Results for Decision Tree 2>>>>>>>>>>☘☘☘☘☘☘☘☘")53print("MSE: ", mse)54print("The feature importances: ", reg_2.feature_importances_)55print()56titles = export_text(reg_2, feature_names=[headers_array[0], headers_array[1], headers_array[2]])57print(titles)58# I don't know if its about the python/pycharm version but mine doesn't have feature_name.59# it has feature_names and doesn't accept headers_array directly, so I had to do it that way6061reg_3 = DecisionTreeRegressor(random_state=0, max_depth=None)62reg_3.fit(x_train, y_train)63y_hat_3 = reg_3.predict(x_test)64mse = np.mean(np.square(y_hat_3 - y_test))65print("☘☘☘☘☘☘☘☘☘<<<<<<<<<<Results for Decision Tree 3>>>>>>>>>>☘☘☘☘☘☘☘☘")66print("MSE: ", mse)67print("The feature importances: ", reg_3.feature_importances_)68print()69titles = export_text(reg_3, feature_names=[headers_array[0], headers_array[1], headers_array[2]])70print(titles)717273plt.plot([1, 15000, 25000], [1, 15000, 25000], c="lavender")74plt.scatter(y_test, y_hat, c="mediumpurple")75plt.scatter(y_test, y_hat_2, c="palevioletred")76plt.scatter(y_test, y_hat_3, c="mediumturquoise")77plt.title("Decision Trees: Predictions vs. Actual Values")78plt.xlabel("Actual Salary Values for Test Data")79plt.ylabel("Salary Predictions for Test Data")80plt.legend(["No-error line", "Decision Tree 1(Max depth: 1)",81 "Decision Tree 2(Max depth: 3)", "Decision Tree 3(Max depth: None)"]) ...

Full Screen

Full Screen

httpclient.py

Source:httpclient.py Github

copy

Full Screen

1#! /usr/bin/env python2# Dzmitry Kuzmitch34import sys5import socket6import struct7import random8import datetime, time9import os.path1011link = sys.argv[1]121314#=================== Getting link info and setting connection15link = link.split('/')16host = link[0].split(':')[0]17port = link[0].split(':')[1]18filename = link[1]19server_address = (host, int(port))20#=================== Getting link info and setting connection212223#=================== Setting headers24message = 'GET /' + filename + ' HTTP/1.1\r\n'25message += 'Host: ' + link[0] + '\r\n'2627#=================== If cache exists28if os.path.isfile(filename.split('.')[0] + '.cache'):29 secs = os.path.getmtime(filename.split('.')[0] + '.cache')30 tg = time.gmtime(secs)31 last_mod_time = time.strftime("%a, %d %b %Y %H:%M:%S GMT\r\n", tg)3233 message += 'If-Modified-Since: ' + last_mod_time + '\r\n'34 #=============== If cache exists3536message += '\r\n'37#=================== Setting headers383940#=================== Trying to connect41sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)4243print("")44print("")45print("")46print(message)4748try:49 sock.connect(server_address)50 sock.sendall(message.encode())51 #=============== Reading data from buffer52 data = b''53 while True:54 buf = sock.recv(1024)55 if not buf:56 break57 data += buf58 sock.close()59 #=============== Reading data from buffer60 data = data.decode()61 headers_array = data.split("\n")6263 headers = ""64 for item in headers_array[:6]:65 headers += item66 headers += "\n"6768 print(headers)6970 content = ""71 if headers_array[0].split(" ")[1] == "200":72 for item in headers_array[6:]:73 content += item74 content += "\n"75 f = open(filename.split('.')[0] + '.cache', "w+")76 f.write(content)77 f.close()7879 elif headers_array[0].split(" ")[1] == "304":80 content = ""81 #f = open(filename.split('.')[0] + '.cache', "r")82 #if f.mode == 'r':83 #content = f.read()84 #f.close()8586 elif headers_array[0].split(" ")[1] == "404":87 content = "404 Not Found"8889 print(content)90 sock.close()91 92except socket.timeout as e:93 print('\nRequest attempt timed out')9495except OSError as e:96 print('\nRequest attempt timed out (with an error)') ...

Full Screen

Full Screen

Statistic.py

Source:Statistic.py Github

copy

Full Screen

1import numpy as np2from scipy.stats import ttest_ind, rankdata, ranksums3def t_student(headers_array, scores, alfa=.05):4 t_statistic = np.zeros((len(headers_array), len(headers_array)))5 p_value = np.zeros((len(headers_array), len(headers_array)))6 # Wyliczenie t_statystyki i p-value dla wszytskich par7 for i in range(len(headers_array)):8 for j in range(len(headers_array)):9 t_statistic[i, j], p_value[i, j] = ttest_ind(scores[i], scores[j])10 # Wyliczenie przewagi danego algorytmu11 advantage = np.zeros((len(headers_array), len(headers_array)))12 advantage[t_statistic > 0] = 113 # Wyliczenie które algorytmy sa statystycznie różne14 significance = np.zeros((len(headers_array), len(headers_array)))15 significance[p_value <= alfa] = 116 # Wymnożenie macieży przewag i macieży znaczności17 stat_better = significance * advantage18 return stat_better19def wilcoxon(headers_array, scores, alpha=.05):20 # Średnie wyniki dla każdego z foldów21 mean_scores = np.mean(scores, axis=2).T22 # Przypisanie rang od 1 do (liczby estymatorów) w przypadku remisów uśredniamy23 ranks = []24 for ms in mean_scores:25 ranks.append(rankdata(ms).tolist())26 ranks = np.array(ranks)27 mean_ranks = np.mean(ranks, axis=0)28 # Obliczenie t-statisticy i p-value29 w_statistic = np.zeros((len(headers_array), len(headers_array)))30 p_value = np.zeros((len(headers_array), len(headers_array)))31 for i in range(len(headers_array)):32 for j in range(len(headers_array)):33 w_statistic[i, j], p_value[i, j] = ranksums(ranks.T[i], ranks.T[j])34 advantage = np.zeros((len(headers_array), len(headers_array)))35 advantage[w_statistic > 0] = 136 significance = np.zeros((len(headers_array), len(headers_array)))37 significance[p_value <= alpha] = 138 # Wymnożenie macieży przewag i macieży znaczności39 stat_better = significance * advantage...

Full Screen

Full Screen

Playwright tutorial

LambdaTest’s Playwright tutorial will give you a broader idea about the Playwright automation framework, its unique features, and use cases with examples to exceed your understanding of Playwright testing. This tutorial will give A to Z guidance, from installing the Playwright framework to some best practices and advanced concepts.

Chapters:

  1. What is Playwright : Playwright is comparatively new but has gained good popularity. Get to know some history of the Playwright with some interesting facts connected with it.
  2. How To Install Playwright : Learn in detail about what basic configuration and dependencies are required for installing Playwright and run a test. Get a step-by-step direction for installing the Playwright automation framework.
  3. Playwright Futuristic Features: Launched in 2020, Playwright gained huge popularity quickly because of some obliging features such as Playwright Test Generator and Inspector, Playwright Reporter, Playwright auto-waiting mechanism and etc. Read up on those features to master Playwright testing.
  4. What is Component Testing: Component testing in Playwright is a unique feature that allows a tester to test a single component of a web application without integrating them with other elements. Learn how to perform Component testing on the Playwright automation framework.
  5. Inputs And Buttons In Playwright: Every website has Input boxes and buttons; learn about testing inputs and buttons with different scenarios and examples.
  6. Functions and Selectors in Playwright: Learn how to launch the Chromium browser with Playwright. Also, gain a better understanding of some important functions like “BrowserContext,” which allows you to run multiple browser sessions, and “newPage” which interacts with a page.
  7. Handling Alerts and Dropdowns in Playwright : Playwright interact with different types of alerts and pop-ups, such as simple, confirmation, and prompt, and different types of dropdowns, such as single selector and multi-selector get your hands-on with handling alerts and dropdown in Playright testing.
  8. Playwright vs Puppeteer: Get to know about the difference between two testing frameworks and how they are different than one another, which browsers they support, and what features they provide.
  9. Run Playwright Tests on LambdaTest: Playwright testing with LambdaTest leverages test performance to the utmost. You can run multiple Playwright tests in Parallel with the LammbdaTest test cloud. Get a step-by-step guide to run your Playwright test on the LambdaTest platform.
  10. Playwright Python Tutorial: Playwright automation framework support all major languages such as Python, JavaScript, TypeScript, .NET and etc. However, there are various advantages to Python end-to-end testing with Playwright because of its versatile utility. Get the hang of Playwright python testing with this chapter.
  11. Playwright End To End Testing Tutorial: Get your hands on with Playwright end-to-end testing and learn to use some exciting features such as TraceViewer, Debugging, Networking, Component testing, Visual testing, and many more.
  12. Playwright Video Tutorial: Watch the video tutorials on Playwright testing from experts and get a consecutive in-depth explanation of Playwright automation testing.

Run Playwright Python automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful