How to use entry_href method in autotest

Best Python code snippet using autotest_python

kgcRequest.py

Source:kgcRequest.py Github

copy

Full Screen

1import json2import random3import re4import time5import execjs6import RequestUtil7import StrUtil8import UrlUtil9def get_page_sub_href(page):10 # 获取提交试卷的地址 并返回11 return re.findall('<aid="putIn"href="javascript:void\(0\);"data="(.*?)"title=""',12 StrUtil.formatting(page.text))[0]13class kgcRequest:14 session = RequestUtil.session({15 "http://tiku.kgc.cn/testing/error": "课工场页面异常"16 })17 # kgc网站上的路径18 urls = UrlUtil.regionUrl("http", "tiku.kgc.cn", "80", {19 "my_exam_js": "resources/V12.0.0.5/js/myexam.js"20 })21 # kgc网站上的请求结果集合22 results = {23 }24 kgcData = {25 "kgc_index_path": "",26 "entry_name": "",27 "entry_href": "",28 "get_paper_href": "",29 "100%go_on": True,30 "auto_choice_get_paper_href": True,31 "do_time": "",32 "last_data": ["", ""]33 }34 def __init__(self, kgcData: dict):35 for dataKey in self.kgcData.keys():36 try:37 self.kgcData[dataKey] = kgcData[dataKey]38 except KeyError:39 kgcData[dataKey] = ""40 kgc_index = self.session.get(self.kgcData["kgc_index_path"])41 if kgc_index.status_code == 200:42 if '对应的passport不存在' not in kgc_index.text:43 self.urls.put("index", self.kgcData["kgc_index_path"])44 self.results[self.urls.get("index")] = kgc_index45 # 加载js46 self.load_js()47 # 加载查询刷题数的url48 self.load_today_question_count_url(kgc_index)49 # 加载我的历史地址50 self.load_history_href(kgc_index)51 else:52 raise RequestUtil.RequestUtilError53 def reload(self, kgc_index_path: str):54 self.kgcData["kgc_index_path"] = kgc_index_path55 self.__init__(self.kgcData)56 # 加载入口 返回加载好的入口名57 def load_entry(self, choiceEntry=None, kgc_index_result=None):58 if choiceEntry is None:59 choiceEntry = self.kgcData["entry_name"]60 if kgc_index_result is None:61 kgc_index_result = self.results[self.urls.get("index")]62 class entry:63 name = ""64 href = ""65 def __init__(self, entry_block):66 self.name = re.findall('<spanclass="test_list_name">(.*?)<imgsrc=', entry_block)[0]67 self.href = re.findall('<spanclass="test_list_go"><a.+href="(.*?)">进入</a>', entry_block)[0]68 print("正在加载刷题入口。。。")69 # 获取所有参数入口块70 entry_blocks = re.findall('<divclass="test_listitem\\d">(.*?)</div>', StrUtil.formatting(kgc_index_result.text))71 # 入口对象72 entryObjects = []73 for entryBlock in entry_blocks:74 entryObjects.append(entry(entryBlock))75 # 如果地址不在a链接76 last_entryObject = entryObjects[len(entryObjects) - 1]77 if last_entryObject.href == "javascript:void(0);":78 last_entryObject.href = re.findall('<spanclass="test_list_go"><a.+data="(.*?)"', entryBlock)[0]79 # 遍历入口名看有没有和传入入口名一样的80 for entryObject in entryObjects:81 if entryObject.name == choiceEntry:82 self.kgcData["entry_name"] = entryObject.name83 self.kgcData["entry_href"] = entryObject.href84 return entryObject.name85 # 循环完了还是没有86 print("请选择入口:")87 print("序号\t入口名")88 for index in range(len(entryObjects)):89 print(str(index + 1) + "\t" + entryObjects[index].name)90 while True:91 try:92 entryObject = entryObjects[int(input("请选择:")) - 1]93 self.kgcData["entry_name"] = entryObject.name94 self.kgcData["entry_href"] = entryObject.href95 return entryObject.name96 except ValueError:97 print("请输入数字!")98 except IndexError:99 print("请输入1-" + str(len(entryObjects)) + "范围的数字!")100 # 根据入口加载获取试卷的地址101 def load_get_paper_href(self):102 # 获取试卷之前先判断是否加载入口过103 if self.kgcData["entry_name"] == "" and self.kgcData["entry_href"] == "":104 print("错误!:未加载入口")105 self.load_entry()106 # 请求入口107 entry_result = self.session.get(self.kgcData["entry_href"])108 if entry_result.url == '':109 raise IndexError("list index out of range")110 else:111 # 判断入口是否直接进入试卷(直接进入试卷代表是模拟真题型)112 if len(re.findall('<p class="f14">考试剩余时间</p>', entry_result.text)) == 0:113 # 入口块类114 class entryBlock:115 enter_fun_name = ""116 enter_fun_arguments = []117 data = []118 a_id = 0119 title = ""120 def __init__(self, enter_fun_name, enter_fun_arguments, title=""):121 self.enter_fun_name = enter_fun_name122 self.enter_fun_arguments = enter_fun_arguments123 self.title = title124 # 转化后的请求入口125 format_entry_result = StrUtil.formatting(entry_result.text)126 # 获取所有一级入口 进入测试 执行的js方法127 do_js_funs = re.findall('javascript:(.*?);?"', format_entry_result)128 # 获取执行的方法名129 do_js_fun_name = re.findall('(.*?)\(', do_js_funs[0])[0]130 # 如果为专项技能型 进入测试 执行的js方法会多匹配到一个添加二级入口方法131 if do_js_fun_name == "percentAlert":132 do_js_funs = do_js_funs[:-1]133 # 获取function块134 functions = re.findall("function\s+.+\(.+\)\{[\s\S]*?\}", entry_result.text)135 # 定义script块136 script = ""137 # 使用的方法138 use_fun = [do_js_fun_name, "percentOutlineAlert", "percentChapterAlert"]139 # 排除未使用方法140 index = 0141 length = len(functions)142 while index < length:143 if re.findall('function\s*(.*?)\(', functions[index])[0] not in use_fun:144 del functions[index]145 index -= 1146 length -= 1147 index += 1148 for function in functions:149 attr_data = re.findall('\$\("#(.*?)"\).attr\("(.*?)"\)', function)150 if len(attr_data) > 0:151 attr_data = attr_data[0]152 func = function153 if len(attr_data) > 0:154 func = function.replace("){", "," + attr_data[1] + "){"). \155 replace('$("#' + attr_data[0] + '").attr("' + attr_data[1] + '")', "data")156 func = func.replace("window.location.href=", "return ")157 func = func.replace("var data =", "return ")158 script += "\n" + func159 function_names = re.findall('function\s+(.*?)\(', script)160 script = execjs.compile(script)161 # 获取方法可能性的拼接字符串162 function_names_joint = ""163 for index in range(len(function_names)):164 if index != 0:165 function_names_joint += "|"166 function_names_joint += function_names[index]167 # 查找所有测试方法入口对象 (当前包含一级入口)168 entryBlocks = []169 for do_js_fun in do_js_funs:170 name = re.findall('(.*?)\(', do_js_fun)[0]171 try:172 arguments = re.findall('[' + function_names_joint + ']\((.*?)\)', do_js_fun)[0].split(",")173 except re.error:174 print()175 entryBlocks.append(entryBlock(name, arguments))176 if do_js_fun_name == "unitExam":177 # 加载data178 data_s = re.findall('data="(.*?)"title="进入测试"', format_entry_result)179 for index in range(len(data_s)):180 entryBlocks[index].enter_fun_arguments.append(data_s[index])181 elif do_js_fun_name == "percentAlert":182 # 获取所有a_id data title percent183 a_data_title_percent_s = re.findall(184 ';position:relative"><aid="(.*?)"href="#"title=""class="no-sj"data="(.*?)"'185 'style="padding-left:30px">(.*?)</a><p><spanstyle="width:(.*?)"></span>', format_entry_result)186 # 将所有a_id 和 data加入口类块187 for index in range(len(a_data_title_percent_s)):188 entryBlocks[index].a_id = a_data_title_percent_s[index][0]189 entryBlocks[index].data = a_data_title_percent_s[index][1].split(",")190 entryBlocks[index].title = a_data_title_percent_s[index][2] + "\t" + \191 a_data_title_percent_s[index][3]192 # 二级入口点击事件的js代码块193 skillList_click_js_block = ""194 # 二级入口的方法名195 skillList_fun_name = ""196 # 加载获取二级入口地址的js方法197 def load_skill_list_click_js_block():198 nonlocal skillList_click_js_block199 nonlocal skillList_fun_name200 print("正在加载专项技能型的二级入口。。。。")201 # 获取到二级入口点击事件的js代码块202 skillList_click_js_block = re.findall('if\s?\(pos\s?!=\s?-1\)\s?\{'203 '[\s\S]*'204 '\}[\s\t\n\r]*'205 'obj\.toggleClass\("yes-sj"\);[\s\t\n\r]*'206 '\}[\s\t\n\r]*'207 '\}[\s\t\n\r]*'208 '\}\);[\s\t\n\r]*'209 '[\s\t\n\r]*\}[\s\t\n\r]?else[\s\t\n\r]?(\{'210 '[\s\S]*'211 '\})[\s\t\n\r]*'212 '\}[\s\t\n\r]*'213 '\$\(this\)\.toggleClass\("yes-sj"\);',214 entry_result.text)[0]215 # 二次处理二级入口点击事件的js代码216 # 将代码块转化为方法217 skillList_click_js_block = "\nfunction click(a_id,data){\r" + skillList_click_js_block[218 1:] + "\r"219 # 将js代码拆解为一行一行220 skillList_click_js_block_lines = re.findall('\n(.*?)\r', skillList_click_js_block)221 # 在集合中删除一对指定字符222 def remove_pair(char, list_, start_index):223 char_dic = {224 "{": "}",225 "[": "]",226 "<": ">",227 "(": ")",228 }229 # 删除之前先判断当前行是否带有指定字符230 if char in list_[start_index]:231 index = start_index + 1232 deep = 0233 while index < len(list_):234 if char in list_[index] and char_dic[char] not in list_[index]:235 deep += 1236 elif char_dic[char] in list_[index] and deep == 0 and True if list_[index].find(237 char) == -1 \238 else list_[index].find(char_dic[char]) < list_[index].find(char):239 result_index = [index]240 # 如果要移除的行内还有指定字符241 if char in list_[index]:242 for result_index_ in remove_pair(char, list_, index):243 result_index.append(result_index_)244 # 返回需要删除的下标245 return result_index246 elif char_dic[char] in list_[index] and char not in list_[index]:247 deep -= 1248 index += 1249 # 排除带有jquery的防报错250 skillList_click_js_block_lines_index = 0251 skillList_click_js_block_lines_length = len(skillList_click_js_block_lines)252 while skillList_click_js_block_lines_index < skillList_click_js_block_lines_length:253 # 如果当前行带有$.ajax(254 if "$.ajax(" in skillList_click_js_block_lines[skillList_click_js_block_lines_index]:255 del_index = remove_pair(256 "(", skillList_click_js_block_lines, skillList_click_js_block_lines_index)257 for i in del_index:258 skillList_click_js_block_lines[i] = skillList_click_js_block_lines[i].replace(259 ")", "")260 # 将$.ajax 替换为 return261 skillList_click_js_block_lines[skillList_click_js_block_lines_index] = \262 skillList_click_js_block_lines[skillList_click_js_block_lines_index].replace(263 "$.ajax(", "return")264 # 如果当前行带有$265 elif "$" in skillList_click_js_block_lines[skillList_click_js_block_lines_index]:266 if "{" in skillList_click_js_block_lines[skillList_click_js_block_lines_index]:267 del_index = remove_pair("{", skillList_click_js_block_lines,268 skillList_click_js_block_lines_index)269 for index in range(len(del_index)):270 if index < len(del_index) - 1:271 # 删除当前行272 del skillList_click_js_block_lines[del_index[index]]273 skillList_click_js_block_lines_length -= 1274 skillList_click_js_block_lines_index -= 1275 skillList_click_js_block_lines[del_index[index]] = \276 skillList_click_js_block_lines[del_index[index]].replace("}", "")277 # 删除当前行278 del skillList_click_js_block_lines[skillList_click_js_block_lines_index]279 skillList_click_js_block_lines_length -= 1280 skillList_click_js_block_lines_index -= 1281 # 如果当前行带有//注释282 elif StrUtil.formatting(283 skillList_click_js_block_lines[284 skillList_click_js_block_lines_index]).startswith("//"):285 # 删除当前行286 del skillList_click_js_block_lines[skillList_click_js_block_lines_index]287 skillList_click_js_block_lines_length -= 1288 skillList_click_js_block_lines_index -= 1289 skillList_click_js_block_lines_index += 1290 # 查找出申明的变量291 skillList_click_js_block_variables = ["a_id", "data"]292 for skillList_click_js_block_line in skillList_click_js_block_lines:293 re_result = re.findall('var\s*(\w+)\s*=\s*',294 StrUtil.formatting(skillList_click_js_block_line))295 if len(re_result) != 0:296 skillList_click_js_block_variables.append(re_result[0])297 # 移除使用未申明变量的代码行298 skillList_click_js_block_lines_index = 0299 skillList_click_js_block_lines_length = len(skillList_click_js_block_lines)300 while skillList_click_js_block_lines_index < skillList_click_js_block_lines_length:301 re_results = re.findall('(\w+)\s*[=].*;',302 StrUtil.formatting(skillList_click_js_block_lines[303 skillList_click_js_block_lines_index]))304 if len(re_results) > 0:305 for re_result in re_results:306 if re_result not in skillList_click_js_block_variables and not re_result.startswith(307 "var"):308 # 移除当前行309 del skillList_click_js_block_lines[skillList_click_js_block_lines_index]310 skillList_click_js_block_lines_index -= 1311 skillList_click_js_block_lines_length -= 1312 break313 re_results = re.findall('(\w+)\s*[.\[].*;',314 StrUtil.formatting(skillList_click_js_block_lines[315 skillList_click_js_block_lines_index]))316 if len(re_results) > 0:317 for re_result in re_results:318 if re_result not in skillList_click_js_block_variables:319 # 移除当前行320 del skillList_click_js_block_lines[skillList_click_js_block_lines_index]321 skillList_click_js_block_lines_index -= 1322 skillList_click_js_block_lines_length -= 1323 re_results = re.findall('[+]+(\w+)\s*.*;',324 StrUtil.formatting(skillList_click_js_block_lines[325 skillList_click_js_block_lines_index]))326 skillList_click_js_block_lines_index += 1327 # 一行一行的js代码合并328 skillList_click_js_block = ""329 for skillList_click_js_block_line in skillList_click_js_block_lines:330 skillList_click_js_block += skillList_click_js_block_line331 skillList_click_js_block = execjs.compile(skillList_click_js_block)332 # 获取二级入口的方法名333 skillList_fun_name = re.findall("html\+='<aid=\"unitexam\"href=\"javascript:(.*?)\(",334 format_entry_result)[0]335 # 根据入口对象获取二级入口对应的入口对象集合336 def get_skill_list_entry_block_s(entry_block):337 # 获取请求二级入口的路径338 url = skillList_click_js_block.call("click", entry_block.a_id,339 entry_block.data)["url"]340 # 请求二级入口341 skillListResult = self.session.get(url)342 skillList = json.loads(skillListResult.text)343 skill_list_entry_block_s = []344 for skill in skillList:345 # 根据二级入口创建入口对象346 skill_list_entry_block_s.append(entryBlock(skillList_fun_name,347 [skill['percent'], entry_block.data[0], skill["id"]],348 "\t" + skill["name"] + "\t" + str(349 skill["percent"]) + "%"))350 return skill_list_entry_block_s351 # 判断是否自动选择入口地址352 if self.kgcData["auto_choice_get_paper_href"]:353 choose_entry_block = ""354 # 判断如果为课程复习型 或 为专项技能型且到100%继续刷355 if do_js_fun_name == "unitExam" or (356 do_js_fun_name == "percentAlert"357 and self.kgcData["100%go_on"]):358 choose_entry_block = entryBlocks[random.randint(0, len(entryBlocks) - 1)]359 elif do_js_fun_name == "percentAlert" and not self.kgcData["100%go_on"]:360 load_skill_list_click_js_block()361 # 找到第一个不为100%的362 for entryBlock_ in entryBlocks:363 if int(re.findall("\t(.*?)%", entryBlock_.title)[0]) != 100:364 # 判断当前入口的二级入口是否有不为100%(因为kgc有bug)365 skill_list_entry_block_s = get_skill_list_entry_block_s(entryBlock_)366 # 是否全为100%367 is_all_100 = True368 for skill_list_entry_block in skill_list_entry_block_s:369 if int(re.findall("\t.*\t(.*?)%", skill_list_entry_block.title)[0]) != 100:370 is_all_100 = False371 choose_entry_block = skill_list_entry_block372 break373 if not is_all_100:374 choose_entry_block = entryBlock_375 break376 # 如果全为100%377 else:378 choose_entry_block = entryBlocks[0]379 try:380 get_paper_href = script.call(choose_entry_block.enter_fun_name,381 *choose_entry_block.enter_fun_arguments)382 except AttributeError:383 print()384 self.kgcData["get_paper_href"] = get_paper_href385 return self.kgcData["get_paper_href"]386 # 如果不是自动选择入口地址387 else:388 # 判断刷题入口地址是否有匹配的389 if self.kgcData["get_paper_href"] != "":390 for entryBlock_ in entryBlocks:391 get_paper_href = script.call(entryBlock_.enter_fun_name, *entryBlock_.enter_fun_arguments)392 if self.kgcData["get_paper_href"] == get_paper_href:393 self.kgcData["get_paper_href"] = get_paper_href394 return self.kgcData["get_paper_href"]395 # 没有匹配的396 # 判断如果为专项技能型397 if do_js_fun_name == "percentAlert":398 load_skill_list_click_js_block()399 # 循环请求二级入口400 index = 0401 length = len(entryBlocks)402 while index < length:403 skill_list_entry_block_s = get_skill_list_entry_block_s(entryBlocks[index])404 for skill_list_entry_block in skill_list_entry_block_s:405 # 判断刷题入口地址是否有匹配的406 if self.kgcData["get_paper_href"] != "":407 get_paper_href = script.call(skill_list_entry_block.enter_fun_name,408 *skill_list_entry_block.enter_fun_arguments)409 if self.kgcData["get_paper_href"] == get_paper_href:410 self.kgcData["get_paper_href"] = get_paper_href411 return self.kgcData["get_paper_href"]412 # 将二级入口加人入口集合413 entryBlocks.insert(index + 1, skill_list_entry_block)414 length += len(skill_list_entry_block_s)415 index += 1 + len(skill_list_entry_block_s)416 print("刷题刷题入口地址失效")417 # 判断如果为课程复习型418 elif do_js_fun_name == "unitExam":419 print("刷题入口地址失效")420 # 加载title421 titles = re.findall('<li><span>(.*?)</span><aid="unitexam"', format_entry_result)422 for index in range(len(titles)):423 entryBlocks[index].title = titles[index]424 else:425 raise Exception("出现未知课程!请联系作者添加!")426 print("请选择课程:")427 print("序号\t课程名")428 for index in range(len(entryBlocks)):429 print(str(index + 1) + "\t" + entryBlocks[index].title)430 while True:431 try:432 case = int(input("请选择:")) - 1433 if 0 <= case < len(entryBlocks):434 self.kgcData["get_paper_href"] = script.call(entryBlocks[case].enter_fun_name,435 *entryBlocks[case].enter_fun_arguments)436 else:437 raise IndexError438 return self.kgcData["get_paper_href"]439 except ValueError:440 print("请输入数字!")441 except IndexError:442 print("请输入1-" + str(len(entryBlocks)) + "范围的数字!")443 else:444 self.kgcData["get_paper_href"] = self.kgcData["entry_href"]445 return self.kgcData["get_paper_href"]446 # 根据获取试卷的地址加载试卷 返回试卷447 def load_test_paper(self, get_paper_href=None):448 if get_paper_href is None:449 get_paper_href = self.kgcData["get_paper_href"]450 # 测试获取试卷的地址是否可用451 test_paper = self.session.get(get_paper_href)452 if test_paper.status_code != 200:453 raise Exception("获取试卷的地址失效或网络错误!")454 self.results["test_page"] = test_paper455 # 返回试卷456 return test_paper457 # 将提交地址加入urls458 # self.urls.put("current_sub_href", sub_href)459 # return self.kgcData["current_page_code"]460 # 加载试卷试题分析461 def load_solutions_paper(self, sub_test_paper_result):462 if sub_test_paper_result["result"]:463 # 获取到试卷报告页面464 report_page_result = self.session.get(self.urls.get("go_back_url"))465 # 获取到试卷分析地址466 solutions_href = re.findall('>查看报告</a><ahref="(.*?)"title=""[\s\S]*>查看解析</a>',467 StrUtil.formatting(report_page_result.text))[0]468 return self.session.get(solutions_href)469 else:470 raise Exception("提交试卷,未知错误!")471 # 加载答案472 def load_answer(self, solutions_paper=None):473 if solutions_paper is None:474 solutions_paper = self.load_solutions_paper(self.sub_test_paper())475 # 获取所有答案块476 subjects = re.findall(r'<ulclass="sec2grays">(.*?)</ul><divclass="sec3reportfont-yaheif14strong">',477 StrUtil.formatting(solutions_paper.text))478 # 获取所有题目id479 # 答案和获取每道题的正确答案并填入题目信息数组480 correctAnswers = []481 for i, su in enumerate(subjects):482 questionAnswers = re.findall(483 r'<lic?l?a?s?s?=?"?g?r?e?e?n?"?><pre><span>([A-Z]):</span><imagestyle="vertical-align:middle"src="('484 r'.*?)"/></pre>',485 su)486 for index in range(len(questionAnswers)):487 questionAnswers[index] = list(questionAnswers[index])488 answer_li = re.findall(r'<liclass="green"><pre><span>[A-Z]:</span>', su)489 an = []490 # 多选题的可能性491 if len(answer_li) > 1:492 for answer in answer_li:493 an.append(re.findall(r"<span>(.*):</span>", answer)[0])494 correctAnswers.append(an)495 # 单选题的可能性496 else:497 an.append(re.findall(r"<span>(.*):</span>", answer_li[0])[0])498 correctAnswers.append(an)499 # 将答案转成01234500 answers_num_str = []501 for i in range(len(correctAnswers)):502 if len(correctAnswers[i]) == 1:503 answers_num_str.append(str(ord(correctAnswers[i][0].upper()) - 65))504 else:505 j = ""506 for anwer in correctAnswers[i]:507 j += str(ord(anwer.upper()) - 65) + ","508 answers_num_str.append(j[:-1])509 return answers_num_str510 # 问题数据类511 class question_data:512 paper_code = ""513 question_id = ""514 sub_question_id = ""515 question_index = ""516 question_type = ""517 def __init__(self, paper_code, question_id, sub_question_id,518 question_index, question_type):519 self.paper_code = paper_code520 self.question_id = question_id521 self.sub_question_id = sub_question_id522 self.question_index = question_index523 self.question_type = question_type524 # 加载问题数据集合525 def load_question_data(self, test_paper=None):526 if test_paper is None:527 test_paper = self.load_test_paper()528 # 获取所有题数据块529 data_s = list(set(re.findall(r'<dddata="(.*?)">', StrUtil.formatting(test_paper.text))))530 data = []531 # 循环将题库数据块转化为问题数据对象532 for index in range(len(data_s)):533 data = data_s[index].split(",")534 data_s[index] = self.question_data(data[1], data[2], data[3], data[4], data[5])535 # 将data_s填入last_data536 self.kgcData["last_data"][0] = data[0]537 self.kgcData["last_data"][1] = data[1]538 # 排序539 for index_ in range(len(data_s) - 1):540 for index in range(len(data_s) - index_ - 1):541 if int(data_s[index].question_index) > int(data_s[index + 1].question_index):542 temp_data = data_s[index]543 data_s[index] = data_s[index + 1]544 data_s[index + 1] = temp_data545 return data_s546 # 加载查询刷题数的url547 def load_today_question_count_url(self, kgc_index_result=None):548 if kgc_index_result is None:549 kgc_index_result = self.results[self.urls.get("index")]550 try:551 today_question_count_url = \552 re.findall('\.val\(\'\'\);\$\.ajax\(\{url:"(.*?)",', StrUtil.formatting(kgc_index_result.text))[0]553 except IndexError:554 print()555 if self.session.get(today_question_count_url).status_code == 200:556 self.urls.put("today_question_count_url", today_question_count_url)557 else:558 raise Exception("网络异常")559 # 加载我的历史地址560 def load_history_href(self, kgc_index_result=None):561 if kgc_index_result is None:562 kgc_index_result = self.results[self.urls.get("index")]563 history_href = \564 re.findall('<liseq="2"><ahref="(.*?)"', StrUtil.formatting(kgc_index_result.text))[0]565 if self.session.get(history_href).status_code == 200:566 self.urls.put("history_href", history_href)567 else:568 raise Exception("网络异常")569 # 加载我的历史页面570 def load_history_page(self):571 history_page_result = self.session.get(self.urls.get("history_href"))572 if history_page_result.status_code != 200:573 self.load_history_href()574 history_page_result = self.session.get(self.urls.get("history_href"))575 return history_page_result576 # 获取对应试卷再做一遍页面577 def get_again_answer_page(self, test_paper=None):578 if test_paper is None:579 test_paper = self.load_test_paper()580 # 加载我的历史581 history_page_result = self.load_history_page()582 # 获取进入再做一遍页面的方法名583 again_answer_fun_name = re.findall('查看报告</a><ahref="javascript:(.*?)\(',584 StrUtil.formatting(history_page_result.text))[0]585 # 获取进入再做一遍页面的方法586 again_answer_fun = re.findall('function ' + again_answer_fun_name + "[\s\S]+?}", history_page_result.text)[0]587 again_answer_fun = again_answer_fun.replace("location.href =", "return ")588 again_answer_fun = execjs.compile(again_answer_fun)589 # 获取试卷id590 paper_id = re.findall('<aid="putIn"href="javascript:void\(0\);"'591 'data=".*?/([0-9]*?)"title=""class="f14restacenterpater"'592 '>我要交卷</a>',593 StrUtil.formatting(test_paper.text))[0]594 # 获取再做一遍页面的地址595 again_answer_href = again_answer_fun.call(again_answer_fun_name, *[paper_id, time.time()])596 again_answer_result = self.session.get(again_answer_href)597 if again_answer_result.status_code == 200:598 return again_answer_result599 else:600 raise Exception("网络错误!")601 # 输出答题数和正确率602 def print_today_question_count_url(self):603 print("正在查询答题数和正确率。。。")604 today_question_count_result = self.session.get(self.urls.get("today_question_count_url"))605 if today_question_count_result.status_code != 200:606 self.load_today_question_count_url()607 today_question_count_result = self.session.get(self.urls.get("today_question_count_url"))608 AnswerNum = json.loads(today_question_count_result.text)609 if AnswerNum["result"]:610 msgs = AnswerNum["msg"].split(";")611 print("累计答题数:" + msgs[0] + "\t实际答题数:" + msgs[1] + "\t正确率:" + msgs[2])612 else:613 print("今天还没有做题哟~加油!")614 # 提交试卷 返回转为json的结果615 def sub_test_paper(self, question_data_s, sub_data, test_paper=None):616 if test_paper is None:617 test_paper = self.load_test_paper()618 # 如果提交数据不为空就需要提交答案619 if len(sub_data) > 0:620 # 获取提交答案的地址621 sub_js = re.findall("\.length;(\$\.ajax\(\{url:[\s\S]+?),",622 StrUtil.formatting(623 self.results[self.urls.get("my_exam_js")]624 .text))[0].replace("$.ajax({url:", "return ")625 sub_js = "var lastData=" + str(self.kgcData["last_data"]) + ";" + sub_js626 sub_js = sub_js.replace("\'", "\"")627 paramQuestionId = ""628 question_data_s_len = len(question_data_s)629 for index in range(question_data_s_len):630 paramQuestionId += question_data_s[index].question_id631 if index != question_data_s_len - 1:632 paramQuestionId += ","633 sub_js = "var paramQuestionId=\"" + paramQuestionId + "\";" + sub_js634 sub_js = "function a(){" + sub_js635 sub_js += "}"636 sub_href = execjs.compile(sub_js).call("a")637 # 提交答案638 self.session.post(sub_href, data=sub_data)639 # 获取提交试卷地址640 sub_href = get_page_sub_href(test_paper)641 # 获取试卷id并填入data642 self.kgcData["current_page_code"] = sub_href.split("/")[-1].split("?")[0]643 # 提交试卷644 try:645 result = json.loads(self.session.post(sub_href).text)646 except json.decoder.JSONDecodeError as e:647 if "Expecting value: line" in str(e):648 raise Exception("重复提交!")649 else:650 raise e651 if result["result"]:652 self.urls.put("go_back_url", result["gobackUrl"])653 return result654 else:655 raise Exception("提交试卷,未知错误!")656 # 刷题 正确率657 def do_test_paper(self, accuracy=1):658 # 获取试卷659 test_paper = self.load_test_paper()660 # 提交的数据661 sub_data = {662 "psqId": [],663 "time": [],664 "uAnswers": []665 }666 # 获取问题数据667 question_data_s = self.load_question_data(test_paper)668 # 提交空卷669 sub_test_paper_result = self.sub_test_paper([], [], test_paper)670 # 获取试卷分析671 solutions_paper = self.load_solutions_paper(sub_test_paper_result)672 # 加载答案673 answers = self.load_answer(solutions_paper)674 # 题数675 question_num = len(answers)676 # 计算错题数677 F_num = int(question_num - accuracy * question_num)678 # 循环写入错题679 for i in range(int(F_num)):680 while True:681 ran = str(random.randint(0, 3))682 if ran != answers[i]:683 answers[i] = ran684 break685 # 判断每道题刷题时间686 do_time = self.kgcData["do_time"]687 if do_time == "":688 do_time = str(random.randint(10, 20))689 # 将问题数据填入sub_data690 for index in range(len(question_data_s)):691 sub_data["psqId"].append(question_data_s[index].sub_question_id)692 sub_data["time"].append(do_time)693 try:694 sub_data["uAnswers"].append(answers[index])695 except IndexError as e:696 print()697 # 获取再做一次698 again_answer = self.get_again_answer_page(test_paper)699 # 提交答卷700 sub_test_paper_result = self.sub_test_paper(question_data_s, sub_data, again_answer)701 # 返回是否成功702 return sub_test_paper_result["result"]703 def load_my_exam_js(self):704 self.results[self.urls.get("my_exam_js")] = \705 self.session.get(self.urls.get("my_exam_js"))706 return self.results[self.urls.get("my_exam_js")]707 def load_js(self):708 print("正在加载js")...

Full Screen

Full Screen

studious.py

Source:studious.py Github

copy

Full Screen

1#!/usr/bin/env python32## studious main3import sys4import hashlib5from base32c import cb32encode6# from Moore's book7from PySide2 import QtWidgets as qtw8from PySide2 import QtGui as qtg9from PySide2 import QtCore as qtc10# from PySide2 import QtWebEngineWidgets as qtwe11import ebooklib12from ebooklib import epub13import xml.etree.ElementTree as ETree14_debug = False15_dumpHTML = False16def unique_list(l):17 ulist = []18 for item in l:19 if item not in ulist:20 ulist.append(item)21 return ulist22class EPubTextBrowser(qtw.QTextBrowser):23 """Derived QTW class for the main text view."""24 def set_epub(self, the_epub):25 self.the_epub = the_epub26 def loadResource(self, restype, url):27 """Override to load images that are within the epub."""28 if restype == 2 and url.isRelative():29 if _debug:30 print("Image resource found: ", url.toDisplayString())31 # get file part of it. OR, load as path from zip?32 # for now, assume filename part of URL is the ID.33 imgHref = url.toDisplayString()34 if imgHref.startswith("../"):35 imgHref = imgHref[3:]36 image = self.the_epub.get_item_with_href(imgHref)37 # image = self.the_epub.get_item_with_id(url.fileName())38 if image:39 if _debug:40 print("successfully loaded image of type", type(image))41 image = qtg.QImage.fromData(image.get_content())42 if image.width() > (self.width() * 0.8):43 image = image.scaledToWidth(44 self.width() * 0.8,45 mode=qtc.Qt.TransformationMode.SmoothTransformation)46 # It accepts anything as the variant! Python!47 return image48 else:49 print("image load failed:", imgHref)50 # should we fetch external images? maybe not51 if _debug:52 print("loading non-image resource", url)53 super(EPubTextBrowser, self).loadResource(restype, url)54 55# Inheriting from QMainWindow broke the layouts.56# Should I make another class for the book itself?57class MainWindow(qtw.QMainWindow):58 """UI Class for the Studious epub reader"""59 SECTION = 0 # constants for the treeview60 HREF = 161 62 def __init__(self):63 """GUI Layout is built here."""64 # The book doesn't pass the class and object.65 super(MainWindow, self).__init__()66 # I won't call these if I set it up myself.67 # self.ui = Ui_MainWindow()68 # self.ui.setupUi(self)69 window = qtw.QWidget()70 self.setCentralWidget(window)71 72 self.setWindowTitle("Studious Reader")73 self.resize(960,600)74 ### Menu items75 openPixmap = getattr(qtw.QStyle, 'SP_DialogOpenButton')76 openIcon = self.style().standardIcon(openPixmap)77 openAction = qtw.QAction(openIcon, '&Open', self)78 openAction.setShortcut(qtg.QKeySequence("Ctrl+o"))79 openAction.triggered.connect(self.open_new_file)80 menuBar = qtw.QMenuBar(self)81 fileMenu = menuBar.addMenu("&File")82 fileMenu.addAction(openAction)83 self.setMenuBar(menuBar)84 ### The tricky layout to get the panes to work correctly.85 topLayout = qtw.QHBoxLayout()86 window.setLayout(topLayout)87 88 leftLayout = qtw.QVBoxLayout()89 topLayout.addLayout(leftLayout)90 leftSplitter = qtw.QSplitter(self) # or add to top hlayout?91 leftLayout.addWidget(leftSplitter)92 self.tocPane = qtw.QTreeWidget(self)93 self.tocPane.setColumnCount(2)94 self.tocPane.setHeaderLabels(["Section", "Link"])95 if not _debug:96 self.tocPane.hideColumn(1)97 self.tocPane.itemClicked.connect(self.jump_to_tocitem)98 #leftLayout.addWidget(self.tocPane)99 leftSplitter.addWidget(self.tocPane)100 ## 1. use a new hbox layout to have no splitter on the right101 rightFrame = qtw.QFrame(self)102 ## these didn't make any difference at all.103 # rightFrame.setFrameStyle(qtw.QFrame.NoFrame)104 # rightFrame.setContentsMargins(qtc.QMargins(0,0,0,0))105 leftSplitter.addWidget(rightFrame)106 rightHLayout = qtw.QHBoxLayout()107 rightFrame.setLayout(rightHLayout)108 ## 2. sub-splitter on the right109 # rightSplitter = qtw.QSplitter(self)110 # leftSplitter.addWidget(rightSplitter)111 ## 2a. to have the right splitter be separate112 # topLayout.addWidget(rightSplitter)113 ## 3. just 3 vboxes, no splitters114 # centerLayout = qtw.QVBoxLayout()115 # topLayout.addLayout(centerLayout)116 self.mainText = EPubTextBrowser(self) #qtw.QTextBrowser(self)117 # this isn't doing anything, is it reading the css instead?118 #self.mainText.style = """119 # <style>body{ margin-left: 60px; margin-right: 60px; line-height: 130% }</style>120 #"""121 self.mainText.document().setDefaultStyleSheet(122 'body{ margin-left: 20px; margin-right: 20px; line-height: 110% }')123 mainText_font = qtg.QFont('Liberation Serif', 12)124 mainText_font.setStyleHint(qtg.QFont.Serif)125 self.mainText.setFont(mainText_font)126 self.mainText.setOpenLinks(False)127 self.mainText.anchorClicked.connect(self.jump_to_qurl)128 self.mainText.cursorPositionChanged.connect(self.update_location)129 rightHLayout.addWidget(self.mainText) # 1130 # rightSplitter.addWidget(self.mainText) # 2131 # centerLayout.addWidget(self.mainText) # 3132 # horizontal and vertical is flipped from what I thought.133 self.mainText.setFixedWidth(500)134 # this has no effect if fixedwidth is set.135 # and it doesn't stick to preferred if there's a splitter.136 self.mainText.setSizePolicy(qtw.QSizePolicy.Maximum,137 qtw.QSizePolicy.Preferred)138 # will need this vboxlayout if we add something below the notes.139 # rightLayout = qtw.QVBoxLayout()140 self.notesFrame = qtw.QTextEdit(self)141 # topLayout.addLayout(rightLayout)142 # rightHLayout.addLayout(rightLayout) # 1143 rightHLayout.addWidget(self.notesFrame) # 1 no layout144 # rightSplitter.addWidget(self.notesFrame) # 2 145 # rightLayout.addWidget(self.notesFrame) # 3146 self.show()147 def open_new_file(self):148 filePath, _ = qtw.QFileDialog.getOpenFileName(149 self, caption="Load new ebook", filter="EPub files (*.epub)")150 self.load_epub(filePath)151 152 def update_location(self):153 print("Cursor position:", self.mainText.textCursor().position())154 #def create_toc_model(self, parent):155 # model = qtg.QStandardItemModel(0, 1, parent)156 # # can I just define constants like this?157 # model.setHeaderData(self.SECTION, qtc.Qt.Horizontal, "Section")158 # return model159 def jump_to(self, urlStr):160 """Jump to an internal link. May refer to a separate page in the 161 original epub, or to an anchor."""162 if _debug:163 print("Jumping to", urlStr)164 splitUrl = urlStr.split('#')165 href = splitUrl[0]166 # sectionText = self.the_epub.get_item_with_href(href).get_content()167 # self.mainText.setHtml(sectionText.decode('utf-8'))168 if len(splitUrl) > 1:169 self.mainText.scrollToAnchor(splitUrl[1])170 if _debug:171 print("ANCHORJUMP", splitUrl[1])172 else:173 if _debug:174 print("URLJUMP:", urlStr)175 self.mainText.scrollToAnchor(urlStr)176 # Update cursor position by moving the cursor to where we are177 # and getting its location.178 # or should I do that only when they click?179 # TODO: move TOC highlight to wherever I jumped to180 # (probably with a trigger for the scroll event, because181 # it should work for scrolling too)182 browserRect = self.mainText.rect()183 newCursor = self.mainText.cursorForPosition(browserRect.topLeft())184 #newcursor = self.mainText.cursorForPosition(qtc.QPoint(0,0))185 self.mainText.setTextCursor(newCursor)186 if _debug:187 print("new cursor rect position:", self.mainText.cursorRect())188 print("Cursor position:", self.mainText.textCursor().position())189 190 def jump_to_tocitem(self, item):191 """Jump for a click in the Contents pane."""192 self.jump_to(item.text(1))193 def jump_to_qurl(self, url):194 # TODO: popup asking if want to open in browser195 if url.isRelative():196 self.jump_to(url.toString())197 198 def process_toc(self, toc_node, treenode):199 """ Map the epub ToC structure to a Qt tree view."""200 # rowCount = 0 # rows at each level201 # store and return hrefs for the correct ordering202 # hrefs = []203 filename_anchors = False204 for toc_entry in toc_node:205 if _debug:206 print(type(toc_entry)) # epub.Link or tuple207 if hasattr(toc_entry, 'title'):208 if _debug:209 print(toc_entry.__dict__)210 newRow = qtw.QTreeWidgetItem(treenode)211 newRow.setText(self.SECTION, toc_entry.title)212 entry_href = toc_entry.href213 if entry_href.startswith('xhtml/'):214 entry_href = entry_href[6:]215 newRow.setText(self.HREF, entry_href)216 if len(toc_entry.href.split('#')) < 2:217 filename_anchors = True218 #newRow.setExpanded(True)219 ## leaving this here in case we ever need a custom model?220 #self.tocModel.insertRow(rowCount)221 #self.tocModel.setData(222 # self.tocModel.index(level, rowCount), # self.SECTION),223 # toc_entry.title)224 #rowCount += 1225 else: # it's a pair of the top level and sub-entries, so recurse226 newRow = qtw.QTreeWidgetItem(treenode)227 if _debug:228 print("tuple[0] is", type(toc_entry[0]))229 print(toc_entry[0].__dict__)230 newRow.setText(self.SECTION, toc_entry[0].title)231 entry_href = toc_entry[0].href232 if entry_href.startswith('xhtml/'):233 entry_href = entry_href[6:]234 newRow.setText(self.HREF, entry_href)235 #newLevel = qtw.QTreeWidgetItem(treenode)236 # hrefs += self.process_toc(toc_entry[1], newRow) # newLevel237 filename_anchors |= self.process_toc(toc_entry[1], newRow)238 newRow.setExpanded(True)239 return filename_anchors240 def load_notes(self, bookfilename, author_last):241 '''Load the notes file corresponding to a book's hash into the242 notes pane.'''243 hasher = hashlib.sha256()244 bookfile = open(bookfilename, 'rb')245 hasher.update(bookfile.read())246 bookfile.close()247 b32str = cb32encode(hasher.digest()[:10])248 print("hash encode: ", author_last[:3] + '_' + b32str)249 250 251 def load_epub(self, filename):252 '''Load epub content, and also call out to generate the TOC and253 load the notes file.'''254 the_epub = epub.read_epub(filename)255 self.mainText.set_epub(the_epub) # have to set early256 #for k,v in the_epub.__dict__.items():257 # print(k, ':', v)258 259 #doc_items = the_epub.get_items_of_type(ebooklib.ITEM_DOCUMENT)260 #doc_list = list(doc_items)261 all_items = the_epub.get_items()262 #for item in list(all_items):263 # will I have to make my own dictionary of these?264 # print("ITEM", item.file_name, item.get_id(), item.get_type())265 if _debug:266 print(the_epub.spine)267 self.tocPane.clear()268 filename_anchors = self.process_toc(the_epub.toc, self.tocPane)269 if filename_anchors:270 print("epub has toc links with filename only")271 # suppress "html" namespace prefix.272 ETree.register_namespace('', 'http://www.w3.org/1999/xhtml')273 # TODO: loop to get the first item that's linear.274 first_item = the_epub.get_item_with_id(the_epub.spine[0][0])275 # merge all the HTML file bodies into one.276 first_text = first_item.get_content().decode('utf-8')277 doc_tree = ETree.fromstring(first_text)278 doc_body = doc_tree.find('{http://www.w3.org/1999/xhtml}body')279 for uid, linear in the_epub.spine: # [1:] # file_hrefs[1:]:280 # TODO: if it's not linear, put it at the end.281 if _debug:282 print("LOADITEM:", uid)283 the_item = the_epub.get_item_with_id(uid)284 text = the_item.get_content().decode('utf-8')285 tree = ETree.fromstring(text)286 body = tree.find('{http://www.w3.org/1999/xhtml}body')287 # body.insert (anchor element, 0)?288 # create div elements with the name corresponding to the file289 if filename_anchors: 290 # tried 'name' and 'id'291 item_name = the_item.get_name()292 if item_name.startswith('xhtml/'):293 item_name = item_name[6:]294 toc_div = ETree.Element('div', {'id': item_name})295 if _debug:296 print("ANCHOR ADDED:", list(toc_div.items()))297 for child in body:298 # why is 0 okay?299 #toc_div.insert(0, child)300 toc_div.append(child)301 # print("CHILD ADDED")302 doc_body.append(toc_div)303 else:304 for child in body:305 doc_body.append(child)306 fulltext = ETree.tostring(doc_tree, encoding='unicode')307 self.mainText.setHtml(fulltext)308 if _dumpHTML:309 print(fulltext)310 # TODO: have spinny until finished loading, so it won't be311 # unresponsive (see the Bible)312 # this lies, it says before finished loading images313 print("load finished.") 314 author_full = the_epub.get_metadata("http://purl.org/dc/elements/1.1/", "creator")[0][0]315 author_last = author_full.split()[-1].lower()316 print(author_last)317 self.load_notes(filename, author_last)318# if __name__ == "__main__":319def main():320 app = qtw.QApplication(sys.argv)321 window = MainWindow()322 if len(sys.argv) > 1:323 bookFilename = sys.argv[1]324 window.load_epub(bookFilename)325 # this trick passes the app's exit code back to the OS....

Full Screen

Full Screen

BraveNewWordsScraper.py

Source:BraveNewWordsScraper.py Github

copy

Full Screen

1#!/usr/bin/env python2# coding: utf-83# In[85]:4import requests5from requests_html import HTMLSession6import re7import csv8def findYear(text):9 yearPattern = re.compile("[1,2][0-9][0-9][0-9]")10 year = yearPattern.search(text)11 if year:12 return year.group()13 return None14def scraper():15 url_A = "http://www.oxfordreference.com/browse?btog=chap&page="16 url_B = "&pageSize=100&sort=titlesort&source=%2F10.1093%2Facref%2F9780195305678.001.0001%2Facref-9780195305678"17 base_selector = "#abstract_link"18 entry_dict = {}19 for i in range(1, 10):20 page_num = i21 url = url_A + str(page_num) + url_B22 base_page = HTMLSession().get(url)23 for j in range(1, 101):24 try:25 entry_num = j + 100*(i - 1)26 selector = base_selector + str(entry_num)27 entry = base_page.html.find(selector)[0]28 entry_name = entry.text29 entry_href = entry.attrs["href"]30 entry_url = "http://www.oxfordreference.com" + entry_href31 entry_page = HTMLSession().get(entry_url)32 sel = ".prosequoteType > p:nth-child(1)"33 try:34 first_year = entry_page.html.find(sel)[0].text35 except Exception as e:36 print(e)37 print("On Entry #" + str(entry_num))38 first_year = ""39 first_year = findYear(first_year)40 if first_year:41 entry_dict[entry_name] = str(first_year)42 print("Entry #" + str(entry_num) + ": " + entry_name)43 print("\t year= " + str(first_year))44 except Exception as e:45 print(e)46 print("Exited inner loop at Entry #" + str(entry_num) + ", Page #" + str(page_num))47 print("Beginning Page #" + str(page_num + 1))48 break49 return entry_dict50data_dict = scraper()51with open('braveNewWords.csv', 'w', encoding="utf-8") as f:52 writer = csv.writer(f)53 for row in data_dict.items():...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run autotest automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful