How to use parse_step method in Behave

Best Python code snippet using behave

html_parser_sbi.py

Source:html_parser_sbi.py Github

copy

Full Screen

1# -*- coding: utf-8 -*-2# for 2.73 4import urllib25from HTMLParser import HTMLParser6# @brief 文字列中の','を全部削除する7# @param src 入力文字列8# @return 削除後の文字列9def deleteComma(src):10 while (',' in src):11 src = src.replace(',','')12 return src13# @brief 文字列中の'&nbsp;'を全て' 'に置き換える14# @param src 入力文字列15# @return 削除後の文字列16def deleteNBSP(src):17 while ('&nbsp' in src):18 src = src.replace('&nbsp',' ')19 return src20 21# @brief 文字列中の' 'を全部削除する22# @param src 入力文字列23# @return 削除後の文字列24def deleteSpace(src):25 while (' ' in src):26 src = src.replace(' ','')27 return src28# @brief 文字列をスペース・改行で分割29# @param src 入力文字列30def splitBySPCRLF(src):31 ret_str = []32 div_lf = src.split('\n');33 for str_div_lf in div_lf:34 if str_div_lf:35 div_sp = str_div_lf.split(' ')36 for str_div_sp in div_sp:37 if str_div_sp:38 if not str_div_sp == '\r':39 ret_str.append(str_div_sp)40 return ret_str41# @brief listからkeyを探してindexを返す42# @param src 対象list43# @param key 探すkey44def searchTdDataTag(src, key):45 key_inx = 046 for tag in src:47 if tag == key:48 return key_inx49 key_inx += 150 return -151# @brief listからkeyを探してindexを返す52# @param src 対象list53# @param inx 参照するlist要素のindex54# @param key 探すkey55def searchTdDataTag_index(src, inx, key):56 key_inx = 057 for elem in src:58 if elem[inx] == key:59 return key_inx60 #else:61 # print elem[inx].decode('utf-8')62 key_inx += 163 return -164 65# @brief 取引所名からSBI用のタグに変換66# @param str 取引所名文字列 67def getInvestimentSBITag(str):68 if u'東証' in str.decode('utf-8'):69 return 'TKY'70 elif u'PTS' in str.decode('utf-8'):71 return 'JNX'72 else:73 return ''74# @brief C++のeOrderTypeカバーclass75class eOrderTypeEnum:76 def __init__(self):77 self.eOrderType = { '買':1, '売':2, '訂正':3, '取消':4, '返売':5, '返買':6 }78 self.eOrderType2 = { '現物買':1, '現物売':2, '信新買':1, '信新売':2, '信返売':5, '信返買':6 }79 80 # SBI取引種別文字列からeOrderType(数値)を得る81 def getOrderTypeFromStr(self, str):82 for key, value in self.eOrderType2.iteritems():83 if key in str:84 b_leverage = not '現物' in key85 return (b_leverage, value)86 return (False, 0)87 # タグからeOrderType(数値)を得る88 def getOrderType(self, tag):89 return self.eOrderType[tag]90 91 # eOrderType(数値)からタグを得る92 def getOrderTag(self, order_type):93 for key, value in self.eOrderType.iteritems():94 if value == order_type:95 return key96 return ''97 98# @brief ログインresponseをparseするclass99# @note SBI-mobile[バックアップ]サイト用100class LoginParserMobile(HTMLParser):101 102 def __init__(self):103 HTMLParser.__init__(self)104 self.start_tag = ''105 #106 self.b_result = False # parse成否107 self.login_result = False # ログイン成否108 109 def handle_starttag(self, tag, attrs):110 if not self.b_result:111 self.start_tag = tag112 113 def handle_endtag(self, tag):114 if not self.b_result:115 self.start_tag = ''116 117 def unescape(self, s):118 # attrsに'&nbsp;'が含まれてるとエラーを出すので置換して対処119 return HTMLParser.unescape(self, deleteNBSP(s))120 121 def handle_data(self, data): # 要素内用を扱うためのメソッド122 if not self.b_result:123 if self.start_tag == 'font':124 if u'パスワードが違います' in data.decode('utf-8'):125 self.b_result = True126 self.login_result = False127 elif self.start_tag == 'a':128 if u'ログアウト' in data.decode('utf-8'):129 self.b_result = True130 self.login_result = True131# @brief ログイン結果を得る(SBI-mobile[バックアップ]サイト用)132# @param html_u8 ログインresponse(html/utf-8)133# @return (処理成否, ログイン成否)134def responseLoginMobile(html_u8):135 parser = LoginParserMobile()136 parser.feed(html_u8)137 parser.close()138 139 return (parser.b_result, parser.login_result)140 141# @brief ログインresponseをparseするclass142# @note SBI-PC[メイン]サイト用143class LoginParserPC(HTMLParser):144 145 def __init__(self):146 HTMLParser.__init__(self)147 self.login_ok = False148 self.login_fail = False149 self.important_msg = False150 self.check_important_seq = 0151 152 def handle_starttag(self, tag, attrs):153 if self.login_ok:154 # ログイン成功確認済み →「重要なお知らせ」が届いてないか調べる155 if not self.important_msg:156 # <div class = "title-text>"><b>重要なお知らせ</b></div>を想定157 if tag == 'div':158 attrs = dict(attrs)159 if 'class' in attrs:160 if attrs['class'] == 'title-text':161 self.check_important_seq = 1162 elif tag == 'b':163 attrs = dict(attrs)164 if self.check_important_seq == 1:165 self.check_important_seq = 2166 else:167 self.check_important_seq = 0168 else:169 if not self.login_fail:170 # ログアウトボタン(gif)があればログイン成功171 # ログインボタン(gif)があればログイン失敗172 if tag == 'img':173 attrs = dict(attrs)174 if 'alt' in attrs:175 u8_alt = attrs['alt'].decode('utf-8')176 if u'ログアウト' == u8_alt:177 self.login_ok = True178 elif u'ログイン' in u8_alt:179 self.login_fail = True180 elif 'title' in attrs:181 u8_title = attrs['title'].decode('utf-8')182 if u'ログアウト' in u8_title:183 self.login_ok = True184 elif u'ログイン' in u8_title:185 self.login_fail = True186 def handle_data(self, data):187 if self.check_important_seq == 2:188 self.check_important_seq = 0189 if u'重要なお知らせ' in data.decode('utf-8'):190 self.important_msg = True191# @brief ログイン結果を得る(SBI-PC[メイン]サイト用)192# @param html_sjis ログインresponse(html/Shift-JIS)193# @return (処理成否, ログイン成否, 重要なお知らせ有無)194def responseLoginPC(html_sjis):195 parser = LoginParserPC()196 parser.feed(html_sjis.decode('cp932').encode('utf-8'))197 if parser.login_ok:198 # parse成功/ログイン成功199 parser_result = True200 login_result = True201 important_msg = parser.important_msg202 elif parser.login_fail:203 # parse成功/ログイン失敗(uid/pwdエラー)204 parser_result = True205 login_result = False206 important_msg = False207 else:208 # parse失敗/ログイン失敗(parseエラー)209 parser_result = False210 login_result = False211 important_msg = False212 parser.close()213 214 return (parser_result, login_result, important_msg)215 216# @brief regist_idを切り出すclass217# @note SBI-mobile[バックアップ]サイト<汎用>218class RegistIDParser(HTMLParser):219 220 def __init__(self):221 HTMLParser.__init__(self)222 self.b_ok = False #意図したhtmlであることが確認できたらTrue223 self.tag_now = '' #注目タグ(子class用)224 self.b_start = False #開始タグだったらTrue(子class用)225 #226 self.regist_id = -1L227 228 def handle_starttag(self, tag, attrs):229 self.tag_now = tag230 self.b_start = True231 # 意図したhtmlであることが確認できてない、またはregist_id確保済みなら処理しない232 if self.b_ok and self.regist_id <= 0:233 if tag == 'input':234 attrs = dict(attrs)235 if 'name' in attrs and 'value' in attrs:236 if attrs['name'] == 'regist_id':237 self.regist_id = long(attrs['value'])238# @brief ポートフォリオ切り出しclass239# @note SBI-mobile[バックアップ]サイト用240# @note lumpStockEntryExecute.doのresponseでしか検証してない241class PortfolioParserMobile(HTMLParser):242 243 def __init__(self):244 HTMLParser.__init__(self)245 self.now_code = 0246 self.now_tag = '';247 self.portfolio = []248 249 def handle_starttag(self, tag, attrs):250 if tag == 'a':251 attrs = dict(attrs)252 if 'href' in attrs:253 if 'ipm_product_code' in attrs['href']:254 self.now_code = int(attrs['href'][-4:])255 def handle_data(self, data):256 if not self.now_code == 0:257 self.portfolio.append([data, self.now_code])258 self.now_code = 0259 260# @brief ポートフォリオを切り出す(SBI-mobile[バックアップ]サイト用)261# @param html_u8 ポートフォリオを含むresponse(html/utf-8)262# @return list[銘柄名, 銘柄コード]263# @note 主にlumpStockEntryExecute.doのresponseをparseする264def getPortfolioMobile(html_u8):265 parser = PortfolioParserMobile()266 parser.feed(html_u8)267 parser.close()268 return parser.portfolio269# @brief ポートフォリオ切り出しclass270# @note SBI-PC[メイン]サイト用/監視銘柄271class PortfolioParserPC(HTMLParser):272 273 def __init__(self):274 HTMLParser.__init__(self)275 self.now_tag = ''276 self.is_portfolio = False #以降にPFを含むhtmlだと判明したらTrue277 self.is_prev_pf_table = False #PFテーブルの直前だったらTrue278 self.in_pf_table = False #PFテーブルに入ったらTrue279 self.in_pf_row = False #PFテーブルの1銘柄分の行に入ったらTrue280 self.in_pf_obj = False #PFテーブルの1銘柄分の行の1要素に入ったらTrue281 self.is_end = False #終了タグ処理中282 self.current_obj = '' #現在注目している要素(テキスト)283 self.pf_obj_name = [] #項目名リスト284 self.pf_row_inx = 0285 self.pf_obj_inx = 0286 self.portfolio = []287 self.valueunit = [ 0, 0, 0, 0, 0, 0, long(0) ]288 self.is_proc_end = False289 290 def handle_starttag(self, tag, attrs):291 if self.is_proc_end:292 return293 self.now_tag = tag294 self.is_end = False295 if not self.is_portfolio:296 if tag == 'select':297 attrs = dict(attrs)298 if 'name' in attrs:299 if attrs['name'] == 'portforio_id':300 self.is_portfolio = True301 elif not self.is_prev_pf_table:302 if tag == 'input':303 attrs = dict(attrs)304 if 'value' in attrs:305 if u'情報更新' == attrs['value'].decode('utf-8'):306 self.is_prev_pf_table = True307 elif not self.in_pf_table:308 if tag == 'table':309 self.in_pf_table = True310 elif not self.in_pf_row:311 if tag == 'tr':312 self.in_pf_row = True313 self.pf_obj_inx = 0314 elif not self.in_pf_obj:315 if tag == 'td':316 self.in_pf_obj = True317 self.current_obj = ''318 319 def isNumber(self, src):320 length = len(src)321 for c in src:322 if not c.isdigit():323 if not c == ',' and not c == '.':324 return False325 return True326 def handle_endtag(self, tag):327 if self.is_proc_end:328 return329 self.is_end = True330 if self.in_pf_row:331 if tag == 'tr':332 #PFテーブル1行分終わり333 self.in_pf_row = False334 if not self.pf_row_inx == 0: #項目名行以外335 self.portfolio.append(self.valueunit[:])336 self.pf_row_inx += 1337 elif tag == 'td' and self.in_pf_obj:338 #PFテーブル1obj分終わり339 self.in_pf_obj = False 340 if self.pf_row_inx == 0: #項目名行341 self.pf_obj_name.append(self.current_obj)342 else:343 obj_name = self.pf_obj_name[self.pf_obj_inx].decode('utf-8')344 if u'銘柄' in obj_name:345 self.valueunit[0] = int(self.current_obj)346 elif u'現在値' in obj_name:347 if self.isNumber(self.current_obj):348 self.valueunit[1] = float(self.current_obj)349 else:350 self.valueunit[1] = float(-1.0)351 elif u'出来高' in obj_name:352 if self.isNumber(self.current_obj):353 self.valueunit[6] = long(self.current_obj)354 else:355 self.valueunit[6] = long(0)356 elif u'始値' in obj_name:357 if self.isNumber(self.current_obj):358 self.valueunit[2] = float(self.current_obj)359 else:360 self.valueunit[2] = float(-1.0)361 elif u'高値' in obj_name:362 if self.isNumber(self.current_obj):363 self.valueunit[3] = float(self.current_obj)364 else:365 self.valueunit[3] = float(-1.0)366 elif u'安値' in obj_name:367 if self.isNumber(self.current_obj):368 self.valueunit[4] = float(self.current_obj)369 else:370 self.valueunit[4] = float(-1.0)371 elif u'前日終値' in obj_name:372 if self.isNumber(self.current_obj):373 self.valueunit[5] = float(self.current_obj)374 else:375 self.valueunit[5] = float(-1.0)376 self.pf_obj_inx += 1377 elif self.in_pf_table:378 if tag == 'table':379 self.in_pf_table = False380 self.is_proc_end = True381 382 def handle_data(self, data):383 if self.is_proc_end:384 return385 if self.in_pf_obj and not self.is_end:386 if len(self.current_obj) == 0:387 self.current_obj = deleteComma(data)388 389# @brief ポートフォリオを切り出す(SBI-PC[メイン]サイト用)/監視銘柄390# @param html_sjis ポートフォリオを含むresponse(html/Shift-JIS)391# @return list[銘柄コード, 現値, 始値, 高値, 安値, 前日終値, 出来高]392def getPortfolioPC(html_sjis):393 #strbuff = open('portfolio_get.html').read()394 parser = PortfolioParserPC()395 parser.feed(html_sjis.decode('cp932').encode('utf-8'))396 parser.close()397 398 return parser.portfolio399 400# @brief ポートフォリオ切り出しclass401# @note SBI-PC[メイン]サイト用/保有銘柄402class PortfolioParserPC_Owned(HTMLParser):403 404 def __init__(self):405 HTMLParser.__init__(self)406 self.now_tag = ''407 self.b_start = False #開始タグ処理中408 self.current_obj = ''409 self.tr_data = []410 self.td_data = []411 self.b_spot = False # 現物収集開始フラグ412 self.b_lev = False # 信用収集開始フラグ413 self.data_tag = ''414 self.b_start_tr = False415 self.tr_inx = 0416 self.td_inx = 0417 self.td_rowspan = 0418 self.parse_step = 0 #parse段階 0:対象divタグ走査419 # 1:divタグ終了チェック => テキストデータチェック(=取得成否チェック)して2へ420 # 2:対象Bタグ走査 (保有株データ位置検出)421 # 3:table走査 (見つからずにtr~/trまで走査しきったら空とみなして2へ)422 # 4:trタグ走査 (table終了検出したら保有株構築して2へ)423 # 5:tdタグ走査 (trタグ終了検出したら4へ)424 # 6:tdタグ終了チェック => テキストデータ取得して5へ425 # -1:完了426 self.b_result = False # parse成否427 self.spot_owned = [] # 現物株428 self.lev_owned = [] # 信用建玉429 430 def handle_starttag(self, tag, attrs):431 if self.parse_step >= 0:432 self.b_start = True433 self.now_tag = tag434 if self.parse_step == 0:435 if tag == 'div':436 attrs = dict(attrs)437 if 'class' in attrs and attrs['class'] == 'title-text':438 #print 'pserse0->1'439 self.parse_step = 1440 elif self.parse_step == 2:441 if self.b_spot and self.b_lev:442 self.parse_step = -1 #終わり443 elif self.parse_step == 3:444 if tag == 'tr':445 self.b_start_tr = True446 elif tag == 'table':447 self.b_start_tr = False448 self.tr_inx = 0449 self.parse_step = 4450 elif self.parse_step == 4:451 if tag == 'tr':452 self.td_inx = 0453 self.parse_step = 5454 elif self.parse_step == 5:455 if tag == 'td':456 attrs = dict(attrs)457 if 'rowspan' in attrs:458 self.td_rowspan = int(attrs['rowspan'])459 else:460 self.td_rowspan = 0461 self.parse_step = 6462 463 def handle_data(self, data):464 if self.parse_step >= 0:465 if self.parse_step == 1 or self.parse_step == 6:466 if data:467 self.current_obj += deleteNBSP(data)468 elif self.parse_step == 2:469 if self.now_tag == 'b':470 u8data = data.decode('utf-8')471 if u'株式' in u8data:472 if u'現物' in u8data and u'一般' in u8data:473 self.data_tag = '現物'474 self.b_spot = True475 self.parse_step = 3476 #print 'pserse2->3A'477 elif u'信用' in u8data:478 self.data_tag = '信用'479 self.b_lev = True480 self.parse_step = 3481 #print 'pserse2->3B'482 self.b_start = False483 def handle_endtag(self, tag):484 if self.parse_step >= 0:485 if self.parse_step == 1:486 if tag == 'div':487 if self.current_obj.decode('utf-8') == u'ポートフォリオ':488 #print 'pserse1->2'489 self.current_obj = ''490 self.b_result = True491 self.parse_step = 2492 else:493 #print 'pserse1->-1'494 self.parse_step = -1495 elif self.parse_step == 3:496 if tag == 'tr' and self.b_start_tr:497 # 収集対象保有株が空だったので次へ498 self.b_start_tr = False499 self.parse_step == 2500 elif self.parse_step == 4:501 if tag == 'table':502 if self.data_tag == '現物':503 if self.tr_inx > 1:504 len_tr_data = len(self.tr_data)505 code = 0506 for tr_inx in range(1, len_tr_data):507 t_td_data = self.tr_data[tr_inx]508 code_str = t_td_data[searchTdDataTag_index(self.tr_data[0], 0, '銘柄(コード)')][0]509 if code_str:510 code = int(code_str[0:4])511 if code == 0:512 self.parse_step = -1513 return #error514 in_spot_inx = -1515 len_spot_data = len(self.spot_owned)516 for spot_inx in range(0, len_spot_data):517 if self.spot_owned[spot_inx][0] == code:518 in_spot_inx = spot_inx519 break520 spot_number = int(deleteComma(t_td_data[searchTdDataTag_index(self.tr_data[0], 0, '数量')][0]))521 if in_spot_inx >= 0:522 old_number = self.spot_owned[in_spot_inx][1];523 self.spot_owned[in_spot_inx] = (code, old_number + spot_number)524 else:525 self.spot_owned.append((code, spot_number))526 elif self.data_tag == '信用':527 if self.tr_inx > 1:528 len_tr_data = len(self.tr_data)529 code = 0530 for tr_inx in range(1, len_tr_data):531 t_td_data = self.tr_data[tr_inx]532 code_str = t_td_data[searchTdDataTag_index(self.tr_data[0], 0, '銘柄(コード)')][0]533 if code_str:534 code = int(code_str[0:4])535 if code == 0:536 self.parse_step = -1537 return #error538 buysell_str = t_td_data[searchTdDataTag_index(self.tr_data[0], 0, '売買建')][0]539 sell_flag = False540 if buysell_str == '売建':541 sell_flag = True542 self.lev_owned.append((code,543 int(deleteComma(t_td_data[searchTdDataTag_index(self.tr_data[0], 0, '数量')][0])),544 sell_flag,545 t_td_data[searchTdDataTag_index(self.tr_data[0], 0, '買付日')][0],546 float(deleteComma(t_td_data[searchTdDataTag_index(self.tr_data[0], 0, '建単価')][0]))547 ))548 del self.tr_data[:]549 self.parse_step = 2550 elif self.parse_step == 5:551 if tag == 'tr':552 self.tr_data.append(self.td_data[:])553 del self.td_data[:]554 self.tr_inx += 1555 self.parse_step = 4556 elif self.parse_step == 6:557 if tag == 'td':558 if self.tr_inx > 0:559 t_td_data = self.tr_data[self.tr_inx-1]560 len_td_data = len(t_td_data)561 # 前行のrowspanを考慮する562 # (rowspanが2以上ならば今行は空データを突っ込む)563 for td_inx in range(self.td_inx, len_td_data):564 t_rowspan = t_td_data[td_inx][1]565 if t_rowspan >= 2:566 self.td_data.append(('', t_rowspan-1))567 self.td_inx += 1568 else:569 break570 # rowspanの影響がない列へ挿入571 self.td_data.append((self.current_obj, self.td_rowspan))572 self.td_inx += 1573 self.parse_step = 5574 self.current_obj = ''575 576# @brief ポートフォリオを切り出す(SBI-PC[メイン]サイト用)/保有銘柄577# @param html_sjis ポートフォリオを含むresponse(html/Shift-JIS)578# @return list[銘柄コード, 数量], list[銘柄コード, 数量, 売買フラグ, 建日, 建単価]579def getPortfolioPC_Owned(html_sjis):580 #strbuff = open('portfolio.html').read()581 parser = PortfolioParserPC_Owned()582 parser.feed(html_sjis.decode('cp932').encode('utf-8'))583 #parser.feed(strbuff.decode('cp932').encode('utf-8'))584 parser.close()585 586 return (parser.b_result,587 parser.spot_owned,588 parser.lev_owned)589# @brief ポートフォリオ登録確認応答からregist_idを切り出す(SBI-mobile[バックアップ]サイト用)590# @param html_u8 lumpStockEntryConfirmのresponse(html/utf-8)591# @return regist_id592def getPortfolioRegistID(html_u8):593 parser = RegistIDParser()594 parser.b_ok = True #ノーチェッkでTrueにしとく595 parser.feed(html_u8)596 regist_id = parser.regist_id597 parser.close()598 599 return regist_id600# @brief ポートフォリオ転送トップページ取得結果(SBI-PC[メイン]サイト用)601# @param html_sjis ログインresponse(html/Shift-JIS)602# @return 成否 603class EntranceOfPortfolioTransmissionParser(HTMLParser):604 605 def __init__(self):606 HTMLParser.__init__(self)607 self.b_success = False608 609 def handle_starttag(self, tag, attrs):610 if not self.b_success:611 # 「登録銘柄リストの追加・置き換え機能を利用する」ボタン(gif)があれば取得成功612 if tag == 'img':613 attrs = dict(attrs)614 if 'alt' in attrs:615 if u'登録銘柄リストの追加・置き換え機能' in attrs['alt'].decode('utf-8'):616 self.b_success = True617def responseGetEntranceOfPortfolioTransmission(html_sjis):618 parser = EntranceOfPortfolioTransmissionParser()619 parser.feed(html_sjis.decode('cp932').encode('utf-8'))620 parser.close()621 622 return parser.b_success623# @brief ポートフォリオ転送要求結果(SBI-PC[メイン]サイト用)624# @param html_sjis ログインresponse(html/Shift-JIS)625# @return 成否 626class ReqPortfolioTransmissionParser(HTMLParser):627 628 def __init__(self):629 HTMLParser.__init__(self)630 self.now_tag = ''631 self.b_start = False632 self.b_success = False633 634 def handle_starttag(self, tag, attrs):635 self.b_start = True636 if not self.b_success:637 self.now_tag = tag638 639 def handle_data(self, data):640 if not self.b_success and self.b_start:641 if self.now_tag == 'p':642 if u'送信指示予約を受付ました' in data.decode('utf-8'):643 self.b_success = True644 self.b_start = False645 646def responseReqPortfolioTransmission(html_sjis):647 parser = ReqPortfolioTransmissionParser()648 parser.feed(html_sjis.decode('cp932').encode('utf-8'))649 parser.close()650 651 return parser.b_success652 653 654# @brief 当日約定履歴切り出しclass655# @note SBI-PC[メイン]サイト用/保有銘柄656# @return list[注文番号(表示用), 注文種別(eOrderType), 取引所種別str, 銘柄コード, 信用フラグ, 完了フラグ, list[約定年月日str, 単価, 株数]657class TodayExecInfoParser(HTMLParser):658 659 def __init__(self):660 HTMLParser.__init__(self)661 self.now_tag = ''662 self.b_start = False #開始タグ処理中663 self.current_obj = ''664 self.table_count = 0665 self.header_tag = []666 self.data_tag = []667 self.header_work = []668 self.data_work = []669 self.data_list_work = []670 self.b_read_header = False671 self.tr_inx = 0672 self.td_inx = 0673 self.parse_step = 0 #parse段階 0:対象divタグ走査674 # 1:divタグ終了チェック => テキストデータチェック(=取得成否チェック)して2へ675 # 2:table走査 (データ本体[10番目のtable]を見つけたら3へ)676 # 3:trタグ走査 (tag=4tr,data=header<2tr>+data<N*1tr>。table終了検出したら完了[-1]。)677 # 4:tdタグ走査 (trタグ終了検出したら3へ)678 # 5:tdタグ終了チェック => テキストデータ取得して4へ679 # -1:完了680 self.b_result = False # parse成否681 self.exec_info = []682 683 def OutputWork(self):684 if self.b_read_header:685 if len(self.header_tag) == len(self.header_work) and self.data_list_work:686 odtype_str = self.header_work[searchTdDataTag(self.header_tag, '取引預り/手数料')]687 ret_func = eOrderTypeEnum().getOrderTypeFromStr(odtype_str)688 order_state = self.header_work[searchTdDataTag(self.header_tag, '発注状況')]689 b_complete = '完了' in order_state690 brand = self.header_work[searchTdDataTag(self.header_tag, '銘柄コード市場')]691 brand_list = splitBySPCRLF(brand)692 self.exec_info.append((int(self.header_work[searchTdDataTag(self.header_tag, '注文番号')]),693 ret_func[1],694 getInvestimentSBITag(brand_list[2]),695 int(brand_list[1]),696 ret_func[0],697 b_complete,698 self.data_list_work[:])) 699 del self.header_work[:]700 del self.data_list_work[:]701 702 def AppendDataWorkToDataListWork(self):703 datetime = self.data_work[searchTdDataTag(self.data_tag, '約定日時')]704 if not len(datetime) == 13: # 'MM/DDHH:MM:SS'必須705 return False706 datetime = datetime[0:5] + ' ' + datetime[5:] #MM/DDとHH:MM:SSの間にスペースを入れる707 self.data_list_work.append((datetime,708 int(deleteComma(self.data_work[searchTdDataTag(self.data_tag, '約定株数')])),709 float(deleteComma(self.data_work[searchTdDataTag(self.data_tag, '約定単価')]))))710 del self.data_work[:]711 return True712 713 def handle_starttag(self, tag, attrs):714 if self.parse_step >= 0:715 self.b_start = True716 self.now_tag = tag717 if self.parse_step == 0:718 if tag == 'div':719 attrs = dict(attrs)720 if 'class' in attrs and attrs['class'] == 'title-text':721 #print 'pserse0->1'722 self.parse_step = 1723 elif self.parse_step == 2:724 if tag == 'table':725 self.table_count += 1726 if self.table_count == 10:727 #print 'pserse2->3'728 self.parse_step = 3729 elif self.parse_step == 3:730 if tag == 'tr':731 #print 'pserse3->4'732 self.parse_step = 4733 elif self.parse_step == 4:734 if tag == 'td':735 #print 'pserse4->5'736 self.parse_step = 5737 738 def handle_data(self, data):739 if self.parse_step >= 0:740 if self.parse_step == 1 or self.parse_step == 5:741 if data:742 self.current_obj += data743 elif self.parse_step == 2:744 if self.now_tag == 'div' or self.now_tag == 'b' or self.now_tag == 'font':745 if u'お客様の株式注文はございません' in data.decode('utf-8'):746 #print 'none'747 self.parse_step = -1748 self.b_start = False749 def handle_endtag(self, tag):750 if self.parse_step >= 0:751 if self.parse_step == 1:752 if tag == 'div':753 if self.current_obj.decode('utf-8') == u'注文一覧':754 #print 'pserse1->2'755 self.current_obj = ''756 self.b_result = True757 self.parse_step = 2758 else:759 #print 'pserse1->-1'760 self.parse_step = -1761 elif self.parse_step == 3:762 if tag == 'table':763 self.OutputWork()764 self.parse_step == -1 # 完了765 elif self.parse_step == 4:766 if tag == 'tr':767 self.td_inx = 0768 #print 'pserse4->3'769 self.parse_step = 3770 if not self.b_read_header:771 if self.tr_inx == 3:772 self.b_read_header = True773 self.tr_inx = 0774 else:775 self.tr_inx += 1776 elif self.tr_inx >= 2:777 if len(self.data_tag) == len(self.data_work):778 self.AppendDataWorkToDataListWork()779 else:780 #print len(self.data_tag),len(self.data_work)781 #print self.data_tag,self.data_work782 self.parse_step = -1 #error(タグとデータが食い違ってる)783 self.tr_inx += 1784 else:785 self.tr_inx += 1786 elif self.parse_step == 5:787 if tag == 'td':788 if not self.b_read_header:789 if self.tr_inx == 0 or self.tr_inx == 1:790 self.header_tag.append(deleteNBSP(self.current_obj))791 elif self.tr_inx == 3:792 self.data_tag.append(deleteNBSP(self.current_obj))793 else:794 if self.td_inx == 0 and self.tr_inx >= 2:795 # 「約定データtr」かチェック796 if not self.current_obj.decode('utf-8') == u'約定':797 self.tr_inx = 0798 self.OutputWork()799 if self.tr_inx == 0 or self.tr_inx == 1:800 self.header_work.append(self.current_obj)801 else:802 self.data_work.append(deleteNBSP(self.current_obj))803 self.td_inx += 1804 self.current_obj = ''805 #print 'pserse5->4'806 self.parse_step = 4807 808# @brief 当日注文一覧から約定履歴を切り出す(SBI-PC[メイン]サイト用)809# @param html_sjis 当日注文一覧response(html/Shift-JIS)810# @return list[注文番号(表示用), 注文種別(eOrderType), 取引所種別str, 銘柄コード, 信用フラグ, 完了フラグ, list[約定年月日str, 単価, 株数]811def getTodayExecInfo(html_sjis):812 #strbuff = open('order_list4.html').read()813 parser = TodayExecInfoParser()814 parser.feed(html_sjis.decode('cp932').encode('utf-8'))815 #parser.feed(strbuff.decode('cp932').encode('utf-8'))816 parser.close()817 818 return (parser.b_result, parser.exec_info)819 820# @brief 余力を切り出すclass821# @note SBI-mobile[バックアップ]サイト<汎用>822class MarginMobileParser(HTMLParser):823 824 def __init__(self):825 HTMLParser.__init__(self)826 self.b_result = False827 self.tag_now = ''828 self.b_start = False829 830 def handle_starttag(self, tag, attrs):831 if not self.b_result:832 self.tag_now = tag833 self.b_start = True834 835 def handle_data(self, data):836 if not self.b_result:837 if self.b_start:838 if self.tag_now == 'title':839 if u'信用建余力' in data.decode('utf-8'):840 self.b_result = True841# @brief 余力を切り出す(SBI-mobile[バックアップ]サイト用)842# @param html_u8 余力画面のresponse(html/utf-8)843# @return result844def getMarginMobile(html_u8):845 parser = MarginMobileParser()846 parser.feed(html_u8)847 parser.close()848 849 return parser.b_result850# @brief regist_idを切り出すclass851# @note 買注文入力画面(SBI-mobile[バックアップ]サイト)用852class StockOrderRegistIDParser(RegistIDParser):853 854 def __init__(self, title_str, order_type):855 RegistIDParser.__init__(self)856 self.title_str = title_str857 self.order_tag = eOrderTypeEnum().getOrderTag(order_type)858 def handle_data(self, data):859 if not self.b_ok and self.b_start and self.tag_now == 'title':860 if '買' == self.order_tag or '返買' == self.order_tag:861 u8data = data.decode('utf-8')862 if self.title_str in u8data:863 # 括弧の全/半角に表記ゆれがある…864 if u'買)' in u8data or u'買)' in u8data or u'買PTS)' in u8data:865 self.b_ok = True866 elif '売' == self.order_tag or '返売' == self.order_tag:867 u8data = data.decode('utf-8')868 if self.title_str in u8data:869 # 括弧の全/半角に表記ゆれがある…870 if u'売)' in u8data or u'売)' in u8data or u'売PTS)' in u8data:871 self.b_ok = True872 elif '訂正' == self.order_tag:873 if u'注文訂正' in data.decode('utf-8'):874 self.b_ok = True875 elif '取消' == self.order_tag:876 if u'注文取消' in data.decode('utf-8'):877 self.b_ok = True878 self.b_start = False879# @brief regist_idを切り出す(SBI-mobile[バックアップ]サイト用)880# @param html_u8 buyOrderEntry/sellOrderEntryのresponse(html/utf-8)881# @return regist_id882def getStockOrderRegistID(html_u8, order_type):883 parser = StockOrderRegistIDParser(u'注文入力', order_type)884 parser.feed(html_u8)885 parser.close()886 return parser.regist_id887 888# @brief regist_idを切り出す(SBI-mobile[バックアップ]サイト用)889# @param html_u8 buyOrderEntryConfirm/sellOrderEntryConfirmのresponse(html/utf-8)890# @return regist_id891def getStockOrderConfirmRegistID(html_u8, order_type):892 parser = StockOrderRegistIDParser(u'注文確認', order_type)893 parser.feed(html_u8)894 parser.close()895 return parser.regist_id896# @brief 注文結果切り出しclass897# @note 注文受付画面(SBI-mobile[バックアップ]サイト)用898class StockOrderExParser(HTMLParser):899 900 def __init__(self):901 HTMLParser.__init__(self)902 self.tag_now = ''903 self.b_start = False904 self.order_type_tag = ''905 self.str_work = ''906 self.td_data = []907 self.table_count = 0908 self.parse_step = 0 #parse段階 0:対象titleタグ走査909 # 1:タイトルチェック910 # 2:対象brタグ走査911 # 3:注文成否フラグセット・注文種別セット912 # 4:対象inputタグ走査、注文番号(管理用/表示用)取得913 # 5:対象table走査914 # 6:tdタグ走査915 # + tableタグを抜けて...916 # table2個分走査し終えてたらtdデータを変換して処理完了917 # まだなら5に戻す918 # 7:tdタグ終了までのデータ取得(空でないものが見つかるまで)919 # + tdタグを抜けたら6に戻す920 # -1:完了921 self.b_result = False #注文成否922 self.b_leverage = False #信用フラグ923 self.order_type = 0 #注文種別[eOrderType]924 self.order_id = -1 #注文番号(内部値/管理用)925 self.user_order_id = -1 #注文番号(ユーザ固有/表示用)926 self.code = 0 #銘柄コード927 self.investments = '' #取引所タグ928 self.numbers = 0 #枚数929 self.value = 0.0 #価格930 931 def handle_starttag(self, tag, attrs):932 if self.parse_step >= 0:933 self.tag_now = tag934 self.b_start = True935 if self.parse_step == 0:936 if tag == 'div':937 attrs = dict(attrs)938 if 'class' in attrs and attrs['class'] == 'titletext':939 self.parse_step = 1940 elif self.parse_step == 2:941 if tag == 'br':942 self.parse_step = 3943 elif self.parse_step == 4:944 # 買/売ならばここで注文番号(管理用)を得てから次ステップ(5)へ945 if tag == 'input':946 attrs = dict(attrs)947 if 'name' in attrs and 'value' in attrs and attrs['name'] == 'orderNum':948 self.parse_step = 5949 self.order_id = int(attrs['value'])950 elif self.parse_step == 5:951 if tag == 'table':952 if self.table_count < 2:953 self.parse_step = 6954 self.table_count += 1955 elif self.parse_step == 6:956 if tag == 'td':957 self.parse_step = 7958 self.str_work = ''959 960 def handle_data(self, data):961 if self.parse_step >= 0:962 if self.parse_step == 4:963 if self.tag_now == 'br':964 if data.isdigit():965 self.user_order_id = int(data)966 # 取消/訂正ならばここで次ステップ(5)へ進める(htmlに管理用注文番号がないので)967 if self.order_type_tag == '取消' or self.order_type_tag == '訂正':968 self.parse_step = 5969 if self.b_start:970 if self.parse_step == 1:971 if self.tag_now == 'div':972 u_str = data.decode('utf-8')973 if u'注文受付' in u_str:974 self.parse_step = 2975 if u'返済売' in u_str:976 self.order_type_tag = '返売'977 elif u'返済買' in u_str:978 self.order_type_tag = '返買'979 elif u'買' in u_str:980 self.order_type_tag = '買'981 elif u'売' in u_str:982 self.order_type_tag = '売'983 elif u'注文訂正' in u_str and u'受付' in u_str:984 self.parse_step = 2985 self.order_type_tag = '訂正'986 elif u'注文取消' in u_str and u'受付' in u_str:987 self.parse_step = 2988 self.order_type_tag = '取消'989 elif self.parse_step == 3:990 if self.tag_now == 'br':991 u_str = data.decode('utf-8')992 # ひらがな/漢字に表記ゆれがある…993 if u'注文' in u_str and (u'受付いたしました' in u_str or u'受付致しました' in u_str):994 self.parse_step = 4995 self.b_result = True996 eot = eOrderTypeEnum()997 self.order_type = eot.getOrderType(self.order_type_tag)998 elif self.parse_step == 7:999 if self.str_work == '':1000 self.str_work = data1001 self.b_start = False1002 def handle_endtag(self, tag):1003 if self.parse_step >= 0:1004 if self.parse_step == 6:1005 if tag == 'table':1006 if self.table_count < 2:1007 self.parse_step = 51008 else:1009 self.parse_step = -11010 #1011 td_data_len = len(self.td_data)1012 for inx in range(0, td_data_len):1013 # 銘柄コード[0]1014 if inx == 0:1015 self.code = int(self.td_data[inx])1016 # 取引所種別[0]1017 elif inx == 1:1018 self.investments = getInvestimentSBITag(self.td_data[inx])1019 # 2以降は偶数がタグ/奇数が値1020 elif not inx & 1:1021 u_data_tag = self.td_data[inx].decode('utf-8')1022 u_data_value = self.td_data[inx+1].decode('utf-8')1023 if u'株数' in u_data_tag or u'注文数' in u_data_tag:1024 if not u'注文後売却' in u_data_tag: #要らんやつ除外1025 self.numbers = int(deleteComma(u_data_value.replace(u'株', '')))1026 if u'価格' in u_data_tag:1027 if u_data_value == u'成行':1028 self.value = float(-1.0)1029 else:1030 if '/' in u_data_value:1031 # '不成/468円' みたいなの(価格指定+条件付き発注)1032 self.value = float(deleteComma(u_data_value.split('/')[1].replace(u'円', '')))1033 else:1034 self.value = float(deleteComma(u_data_value.replace(u'円', '')))1035 if u'取引' in u_data_tag:1036 if u'信用' in u_data_value:1037 self.b_leverage = True1038 else:1039 self.b_leverage = False1040 elif self.parse_step == 7:1041 if tag == 'td':1042 self.parse_step = 61043 self.td_data.append(self.str_work)1044 1045# @brief 注文結果を切り出す(SBI-mobile[バックアップ]サイト用)1046# @param html_u8 buyOrderEx.do/sellOrderEx.do/orderCancelEx.doのresponse(html/utf-8)1047# @return (解析結果, 注文番号(管理用), 注文番号(表示用), 取引所コード(str), 銘柄コード(int), 株数, 価格, 信用フラグ, eOrderType)1048def responseStockOrderExec(html_u8):1049 parser = StockOrderExParser()1050 parser.feed(html_u8)1051 parser.close()1052 1053 return (parser.b_result,1054 parser.order_id,1055 parser.user_order_id,1056 parser.investments,1057 parser.code,1058 parser.numbers,1059 parser.value,1060 parser.b_leverage,1061 parser.order_type)1062# @brief 返済建玉リスト切り出しclass1063# @note 注文受付画面(SBI-mobile[バックアップ]サイト)用1064class RepOrderTateListParser(HTMLParser):1065 1066 def __init__(self):1067 HTMLParser.__init__(self)1068 self.tag_now = ''1069 self.b_start = False1070 self.str_work = ''1071 self.tr_data = []1072 self.td_data = []1073 self.quantity = ''1074 self.table_count = 01075 self.parse_step = 0 # 0:title走査(成否チェック)1076 # 1:input走査(caIQ,code取得)1077 # 2:対象brタグ走査('一括指定は')1078 # 3:対象table走査(3個目)1079 # 4:tr走査、table終了ならデータ選別して完了(-1)1080 # 5:td走査、tr終了なら4へ1081 # 6:td終了ならdata取得して5へ、同時にinputも走査(quantityタグ取得)1082 #-1:完了1083 self.b_result = False #注文成否1084 self.code = 0 #銘柄コード1085 self.caIQ = '' #caIQ1086 self.tatedama = [] #建玉情報list1087 1088 1089 def handle_starttag(self, tag, attrs):1090 if self.parse_step >= 0:1091 self.tag_now = tag1092 self.b_start = True1093 if self.parse_step == 1:1094 if tag == 'input':1095 attrs = dict(attrs)1096 if 'name' in attrs and 'value' in attrs:1097 if attrs['name'] == 'caIQ':1098 self.caIQ = attrs['value']1099 elif attrs['name'] == 'brand_cd':1100 self.code = int(attrs['value'])1101 if self.caIQ and self.code > 0:1102 self.parse_step = 21103 elif self.parse_step == 3:1104 if tag == 'table':1105 if self.table_count >= 2:1106 self.parse_step = 41107 self.table_count += 11108 elif self.parse_step == 4:1109 if tag == 'tr':1110 self.parse_step = 51111 self.quantity = ''1112 elif self.parse_step == 5:1113 if tag == 'td':1114 self.parse_step = 61115 self.str_work = ''1116 elif self.parse_step == 6:1117 if self.tag_now == 'input':1118 attrs = dict(attrs)1119 if 'name' in attrs and 'type' in attrs and attrs['type'] == 'text':1120 self.quantity = attrs['name']1121 1122 def handle_data(self, data):1123 if self.parse_step >= 0:1124 if self.b_start:1125 if self.parse_step == 0:1126 if self.tag_now == 'title':1127 if u'注文入力' in data.decode('utf-8') and u'信用返済' in data.decode('utf-8'):1128 self.parse_step = 11129 self.b_result = True1130 elif self.parse_step == 2:1131 if self.tag_now == 'br':1132 if u'一括指定' in data.decode('utf-8'):1133 self.parse_step = 31134 elif self.parse_step == 6:1135 if not self.str_work:1136 self.str_work = data1137 self.b_start = False1138 1139 def handle_endtag(self, tag):1140 if self.parse_step >= 0:1141 if self.parse_step == 4:1142 if tag == 'table':1143 self.parse_step = -11144 # データ取得1145 if len(self.tr_data) >= 2:1146 tr_inx = 01147 for td_data in self.tr_data:1148 if tr_inx > 0:1149 self.tatedama.append((td_data[searchTdDataTag(self.tr_data[0], '建日')],1150 float(td_data[searchTdDataTag(self.tr_data[0], '建単価')]),1151 int(deleteComma(td_data[searchTdDataTag(self.tr_data[0], '建株数')]).decode('utf-8').replace(u'株', '')),1152 td_data[searchTdDataTag(self.tr_data[0], '数量')]))1153 tr_inx += 11154 elif self.parse_step == 5:1155 if tag == 'tr':1156 self.parse_step = 41157 if not self.tr_data:1158 self.tr_data.append(self.td_data[:])1159 del self.td_data[:]1160 return1161 else:1162 qa_inx = searchTdDataTag(self.tr_data[0], '数量')1163 if qa_inx >= 0:1164 self.td_data[qa_inx] = self.quantity1165 if len(self.td_data) == len(self.tr_data[0]):1166 self.tr_data.append(self.td_data[:])1167 del self.td_data[:]1168 return1169 # error発生1170 self.b_result = False1171 self.parse_step = -11172 elif self.parse_step == 6:1173 if tag == 'td':1174 self.parse_step = 51175 self.td_data.append(self.str_work)1176 1177# @brief 建玉リストを切り出す(SBI-mobile[バックアップ]サイト用)1178# @param html_u8 buyHOrderEntryTateList.doのresponse(html/utf-8)1179# @return (結果, code, caID, list(建日, 単価, 株数, frame名))1180def responseRepLeverageStockOrderTateList(html_u8):1181 parser = RepOrderTateListParser()1182 parser.feed(html_u8)1183 parser.close()1184 1185 return (parser.b_result,1186 parser.code,1187 parser.caIQ,1188 parser.tatedama)1189# @brief [Debug]Shift-JISで送られてきたhtmlをUTF8に変換してからファイル出力する1190# @param html_sjis response(html/Shift-JIS)1191def debugOutputShiftJisHTMLToFile(html_sjis, filename):1192 f = open(filename, 'w')1193 f.write(html_sjis.decode('cp932').encode('utf-8'))1194 f.close()1195# @brief [Debug]UTF8で送られてきたhtmlをそのままファイル出力する1196# @param html_u8 response(html/UTF-8)1197def debugOutputHTMLToFile(html_u8, filename):1198 f = open(filename, 'w')1199 f.write(html_u8)1200 f.close()1201'''1202if __name__ == "__main__":1203 1204 url = "https://k.sbisec.co.jp/bsite/visitor/loginUserCheck.do"1205 request = urllib2.Request(url)1206 request.add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8');1207 request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8');1208 request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36');1209 response = urllib2.urlopen(request)1210 1211 print responseLoginMobile(response.read())1212 1213 response.close()1214 1215if __name__ == "__main__":1216 1217 strbuff = open('mobile_login_ok.html').read()1218 print responseLoginMobile(strbuff)1219if __name__ == "__main__":1220 strbuff = open('test.html').read()1221 for elem in getPortfolioMobile(strbuff):1222 print elem1223if __name__ == "__main__":1224 strbuff = raw_input('>> ')1225 print responseGetEntranceOfPortfolioTransmission(strbuff)1226if __name__ == "__main__":1227 strbuff = open('portfolio.html').read()1228 print getPortfolioPC_Owned(strbuff)1229if __name__ == "__main__":1230 strbuff = open('order_list4.html').read()1231 ret = getTodayExecInfo(strbuff)1232 print ret[0]1233 for w in ret[1]:1234 print w1235if __name__ == "__main__":1236 strbuff = open('buy_order_ex.html').read()1237 print responseStockOrderExec(strbuff)1238if __name__ == "__main__":1239 strbuff = open('buy_order_entry.html').read()1240 print getStockOrderRegistID(strbuff, 1)...

Full Screen

Full Screen

analysis.py

Source:analysis.py Github

copy

Full Screen

1#! /usr/bin/env python32import os3import sys4import re5from datetime import datetime6import argparse7from prettytable import PrettyTable8TIME_FORMAT = "%m/%d %H:%M:%S"9PHASE1_WEIGHT = 43.8810PHASE2_WEIGHT = 19.6411PHASE3_WEIGHT = 33.7012PHASE4_WEIGHT = 2.7813TMP_DIRS = r"^Starting plotting progress into temporary dirs: (.*) and (.*)"14PLOT_ID = r"^ID: (.+)"15PLOT_SIZE = r"^Plot size is: (\d+)"16BUFFER_SIZE = r"^Buffer size is: (\d+)MiB"17BUCKETS = r"^Using (\d+) buckets"18THREADS = r"^Using (\d+) threads of stripe size (\d+)"19START_TIME = r"^Starting phase 1/4: Forward Propagation into tmp files\.\.\. (.+)"20PHASE_1 = r"^Time for phase 1 = (\d+\.\d+) seconds. CPU \((\d+\.\d+)%\)"21PHASE_2 = r"^Time for phase 2 = (\d+\.\d+) seconds. CPU \((\d+\.\d+)%\)"22PHASE_3 = r"^Time for phase 3 = (\d+\.\d+) seconds. CPU \((\d+\.\d+)%\)"23PHASE_4 = r"^Time for phase 4 = (\d+\.\d+) seconds. CPU \((\d+\.\d+)%\)"24TOTAL_TIME = r"^Total time = (\d+\.\d+) seconds. CPU \((\d+\.\d+)%\)"25COPY_TIME = r"^Copy time = (\d+\.\d+) seconds.*\) (.*)"26FILENAME = r"^Renamed final file from \".+\" to \"(.+)\""27MY_START = r"start new plot: (.*)"28MADMAX_TITLE = r"^Multi-threaded pipelined Chia"29MM_DIR = r"Final Directory: (.*)"30MM_TMP1 = r"Working Directory:\s+(.*)"31MM_TMP2 = r"Working Directory 2:\s+(.*)"32MM_NUM = r"Number of Plots: (\d+)"33MM_ID = r"Process ID: (\d+)"34MM_THREADS = r"Number of Threads: (\d+)"35MM_BUCKETS = r"Number of Buckets P1:.*\((.*)\)"36MM_START = r"Plot Name: "37MM_P1_TIME = r"\[P1\] Table.*took (\d+)"38MM_P1_END = r"Phase 1 took (\d+)"39MM_P2_TIME = r"\[P2\] Table.*took (\d+)"40MM_P2_END = r"Phase 2 took (\d+)"41MM_P3_TIME = r"\[P3.*Table.*took (\d+)"42MM_P3_END = r"Phase 3 took (\d+)"43MM_P4_END = r"Phase 4 took (\d+)"44MM_PARSE_END = r"Total plot creation time was (\d+)"45MM_COPY_START = r"^Started copy to (.*)"46MM_COPY_END = r"^Copy to.*finished, took (\d+)"47MM_FILENAME = r"^Renamed final plot to (.*)"48plot_list = []49progress_sum = 050plot_type = "madmax"51class plot_data:52 def __init__(self):53 self.is_madmax = 054 self.tmp_dir1 = ""55 self.tmp_dir2 = ""56 self.plot_id = ""57 self.plot_size = ""58 self.buffer_size = ""59 self.buckets = 060 self.threads = ""61 self.stripe_size = ""62 self.start_time = datetime.now()63 self.phase1_time = 0.064 self.phase2_time = 0.065 self.phase3_time = 0.066 self.phase4_time = 0.067 self.phase1_cpu = ""68 self.phase2_cpu = ""69 self.phase3_cpu = ""70 self.phase4_cpu = ""71 self.total_time = 0.072 self.totalCpu = ""73 self.copy_time = 0.074 self.filename = ""75 self.progress = 0.076 self.elapsed_time = 077 self.phase1_weight = 0.078 self.phase2_weight = 0.079 self.phase3_weight = 0.080 self.phase4_weight = 0.081def open_log(file):82 new_plot = plot_data()83 parse_step = 084 percent = 085 with open(file, "r") as f:86 lines = f.readlines()87 for line in lines:88 if (ret := re.search(MY_START, line)) != None:89 new_plot.start_time = datetime.strptime(ret.group(1), "%Y/%m/%d-%H:%M")90 elif (ret := re.search(TMP_DIRS, line)) != None:91 plot_list.append(new_plot)92 new_plot.tmp_dir1 = ret.group(1)93 new_plot.tmp_dir2 = ret.group(2)94 new_plot.is_madmax = 095 elif (ret := re.search(MADMAX_TITLE, line)) != None:96 plot_list.append(new_plot)97 new_plot.is_madmax = 198 new_plot.plot_size = "32"99 new_plot.buffer_size = "madmax"100 new_plot.elapsed_time = 0101 if new_plot.is_madmax:102 if (ret := re.search(MM_ID, line)) != None:103 new_plot.plot_id = ret.group(1)104 elif (ret := re.search(MM_TMP1, line)) != None:105 new_plot.tmp_dir1 = ret.group(1)106 elif (ret := re.search(MM_TMP2, line)) != None:107 new_plot.tmp_dir2 = ret.group(1)108 elif (ret := re.search(MM_THREADS, line)) != None:109 new_plot.threads = ret.group(1)110 elif (ret := re.search(MM_BUCKETS, line)) != None:111 new_plot.buckets = int(ret.group(1))112 elif (ret := re.search(MM_START, line)) != None:113 parse_step = 1114 new_plot.progress = 0115 percent = PHASE1_WEIGHT / 7;116 elif (ret := re.search(MM_P1_TIME, line)) != None:117 new_plot.progress += percent118 elif (ret := re.search(MM_P1_END, line)) != None:119 parse_step = 2120 new_plot.phase1_time = int(ret.group(1))121 new_plot.progress = PHASE1_WEIGHT122 percent = PHASE2_WEIGHT / 12123 elif (ret := re.search(MM_P2_TIME, line)) != None:124 new_plot.progress += percent125 elif (ret := re.search(MM_P2_END, line)) != None:126 parse_step = 3127 new_plot.phase2_time = int(ret.group(1))128 new_plot.progress = PHASE1_WEIGHT + PHASE2_WEIGHT129 percent = PHASE3_WEIGHT / 12130 elif (ret := re.search(MM_P3_TIME, line)) != None:131 new_plot.progress += percent132 new_plot.phase3_time = int(ret.group(1))133 elif (ret := re.search(MM_P3_END, line)) != None:134 parse_step = 4135 new_plot.phase3_time = int(ret.group(1))136 new_plot.progress = PHASE1_WEIGHT + PHASE2_WEIGHT + PHASE3_WEIGHT137 percent = PHASE4_WEIGHT / 4138 elif (ret := re.search(MM_P4_END, line)) != None:139 parse_step = 5140 new_plot.phase4_time = int(ret.group(1))141 elif (ret := re.search(MM_PARSE_END, line)) != None:142 parse_step = 5143 new_plot.elapsed_time = int(ret.group(1))144 new_plot.total_time = new_plot.elapsed_time145 new_plot.phase1_weight = new_plot.phase1_time / new_plot.total_time * 100146 new_plot.phase2_weight = new_plot.phase2_time / new_plot.total_time * 100147 new_plot.phase3_weight = new_plot.phase3_time / new_plot.total_time * 100148 new_plot.phase4_weight = new_plot.phase4_time / new_plot.total_time * 100149 elif (ret := re.search(MM_COPY_START, line)) != None:150 new_plot.filename = os.path.split(ret.group(1))[1]151 elif (ret := re.search(MM_COPY_END, line)) != None:152 parse_step = 0153 new_plot.progress = 100154 new_plot.copy_time = int(ret.group(1))155 new_plot = plot_data()156 elif (ret := re.search(MM_FILENAME, line)) != None:157 parse_step = 0158 new_plot.progress = 100159 new_plot = plot_data()160 if parse_step < 5:161 new_plot.elapsed_time = int((datetime.now() - new_plot.start_time).total_seconds())162 else:163 if (ret := re.search(PLOT_ID, line)) != None:164 new_plot.plot_id = ret.group(1)165 elif (ret := re.search(PLOT_SIZE, line)) != None:166 new_plot.plot_size = ret.group(1)167 elif (ret := re.search(BUFFER_SIZE, line)) != None:168 new_plot.buffer_size = ret.group(1)169 elif (ret := re.search(BUCKETS, line)) != None:170 new_plot.buckets = int(ret.group(1))171 elif (ret := re.search(THREADS, line)) != None:172 new_plot.threads = ret.group(1)173 new_plot.stripe_size = ret.group(2)174 elif (ret := re.search(START_TIME, line)) != None:175 new_plot.start_time = datetime.strptime(ret.group(1), "%a %b %d %H:%M:%S %Y")176 parse_step = 1177 new_plot.progress = 0178 percent = PHASE1_WEIGHT / 7 / new_plot.buckets;179 elif (ret := re.search(COPY_TIME, line)) != None:180 new_plot.copy_time = float(ret.group(1))181 elif (ret := re.search(PHASE_1, line)) != None:182 new_plot.phase1_time = float(ret.group(1))183 new_plot.phase1_cpu = ret.group(2)184 parse_step = 2185 new_plot.progress = PHASE1_WEIGHT186 percent = PHASE2_WEIGHT / 7187 elif (ret := re.search(PHASE_2, line)) != None:188 new_plot.phase2_time = float(ret.group(1))189 new_plot.phase2_cpu = ret.group(2)190 parse_step = 3191 new_plot.progress = PHASE1_WEIGHT + PHASE2_WEIGHT192 percent = PHASE3_WEIGHT / 6193 elif (ret := re.search(PHASE_3, line)) != None:194 new_plot.phase3_time = float(ret.group(1))195 new_plot.phase3_cpu = ret.group(2)196 parse_step = 4197 percent = PHASE4_WEIGHT / new_plot.buckets;198 new_plot.progress = PHASE1_WEIGHT + PHASE2_WEIGHT + PHASE3_WEIGHT199 elif (ret := re.search(PHASE_4, line)) != None:200 new_plot.phase4_time = float(ret.group(1))201 new_plot.phase1_cpu = ret.group(2)202 elif (ret := re.search(TOTAL_TIME, line)) != None:203 new_plot.total_time = float(ret.group(1))204 new_plot.totalCpu = ret.group(2)205 parse_step = 5206 new_plot.elapsed_time = int(new_plot.total_time)207 new_plot.phase1_weight = new_plot.phase1_time / new_plot.total_time * 100208 new_plot.phase2_weight = new_plot.phase2_time / new_plot.total_time * 100209 new_plot.phase3_weight = new_plot.phase3_time / new_plot.total_time * 100210 new_plot.phase4_weight = new_plot.phase4_time / new_plot.total_time * 100211 elif (ret := re.search(FILENAME, line)) != None:212 parse_step = 0213 new_plot.progress = 100214 new_plot.filename = os.path.split(ret.group(1))[1]215 new_plot = plot_data()216 if parse_step < 5:217 new_plot.elapsed_time = int((datetime.now() - new_plot.start_time).total_seconds())218 if parse_step == 1:219 if (ret := re.search(r"^\sBucket", line)) != None:220 new_plot.progress += percent221 elif parse_step == 2:222 if (ret := re.search(r"^Backpropagating", line)) != None:223 new_plot.progress += percent224 elif parse_step == 3:225 if (ret := re.search(r"^Compressing tables", line)) != None:226 new_plot.progress += percent227 elif parse_step == 4:228 if (ret := re.search(r"^\sBucket", line)) != None:229 new_plot.progress += percent230def conversion_float_time(time):231 try:232 s = int(time)233 if s == 0:234 return ""235 except ValueError:236 return ""237 m, s = divmod(s, 60)238 h, m = divmod(m, 60)239 return "{:02d}:{:02d}".format(h,m)240def analysis_log():241 if options.logdir:242 logdir = options.logdir243 else:244 logdir = os.getenv("CLOGS")245 if logdir == None:246 logdir = os.path.expanduser("~") + "/chialogs"247 tb = PrettyTable()248 title_list = ["k", "Buckets", "Thread", "TmpDir", "StartTime", "ElapsedTime", "PhaseTime", "Progress", "CopyTime"]249 if options.filename:250 title_list.append("PlotFileName")251 if options.phaseweight:252 title_list.append("PHaseWeight")253 tb.field_names = title_list.copy()254 for root,dirs,file_list in os.walk(logdir):255 if options.quicksearch:256 dirs[:] = []257 for filename in file_list:258 file = os.path.join(root, filename)259 open_log(file)260 average_phase1_weight = 0.0261 average_phase2_weight = 0.0262 average_phase3_weight = 0.0263 average_phase4_weight = 0.0264 for plot in plot_list:265 phase_time = conversion_float_time(plot.phase1_time)266 if plot.phase2_time:267 phase_time = phase_time + " / " + conversion_float_time(plot.phase2_time)268 if plot.phase3_time:269 phase_time = phase_time + " / " + conversion_float_time(plot.phase3_time)270 if plot.phase4_time:271 phase_time = phase_time + " / " + conversion_float_time(plot.phase4_time)272 progress = "{:.2f}".format(plot.progress) + "%"273 global progress_sum274 if options.progress:275 if plot.progress < int(options.progress):276 progress_sum += 1277 row = [plot.plot_size, plot.buckets, plot.threads, plot.tmp_dir1,278 plot.start_time.strftime(TIME_FORMAT),279 conversion_float_time(plot.elapsed_time),280 phase_time,281 progress,282 conversion_float_time(plot.copy_time),283 ]284 if options.filename:285 row.append(plot.filename)286 if options.phaseweight:287 if plot.phase4_weight != 0:288 average_phase1_weight = (average_phase1_weight + plot.phase1_weight)/2289 average_phase2_weight = (average_phase2_weight + plot.phase2_weight)/2290 average_phase3_weight = (average_phase3_weight + plot.phase3_weight)/2291 average_phase4_weight = (average_phase4_weight + plot.phase4_weight)/2292 phase_weight = "{:.2f} / {:.2f} / {:.2f} / {:.2f}".format(293 plot.phase1_weight,plot.phase2_weight,plot.phase3_weight,plot.phase4_weight)294 else:295 phase_weight = ""296 row.append(phase_weight)297 tb.add_row(row)298 print(tb.get_string(sortby="StartTime"))299 if options.phaseweight:300 average_phase_weight = "average phase weight: {:.2f} / {:.2f} / {:.2f} / {:.2f}".format(301 average_phase1_weight,average_phase2_weight,average_phase3_weight,average_phase4_weight)302 print(average_phase_weight)303 if options.progress:304 print("plot progress < {}% have {}".format(options.progress, progress_sum))305if __name__ == "__main__":306 title = "analysis"307 parse = argparse.ArgumentParser(title)308 parse.add_argument("-f", "--filename", action="store_true", help="display filename")309 parse.add_argument("-q", "--quicksearch", action="store_true", help="not search subdir")310 parse.add_argument("-w", "--phaseweight", action="store_true", help="check parse weight")311 parse.add_argument("-p", "--progress", help="return progress < xx%% count")312 parse.add_argument("-d", "--logdir", help="set chia log dir (default: ~/chialogs)")313 options = parse.parse_args()314 if options.progress:315 options.quicksearch = True316 analysis_log()317 if options.progress:318 sys.exit(progress_sum)319 else:...

Full Screen

Full Screen

q2_parser_transitions.py

Source:q2_parser_transitions.py Github

copy

Full Screen

...5 self.stack = ["ROOT"]6 self.buffer = sentence[:]7 self.dependencies = []8 ### END YOUR CODE9 def parse_step(self, transition):10 ### YOUR CODE HERE11 if transition == "S":12 self.stack.append(self.buffer[0])13 self.buffer.pop(0)14 elif transition == "LA":15 self.dependencies.append((self.stack[-1], self.stack[-2]))16 self.stack.pop(-2)17 else:18 self.dependencies.append((self.stack[-2], self.stack[-1]))19 self.stack.pop(-1)20 ### END YOUR CODE21 def parse(self, transitions):22 for transition in transitions:23 self.parse_step(transition)24 return self.dependencies25def minibatch_parse(sentences, model, batch_size):26 ### YOUR CODE HERE27 partial_parses = [PartialParse(s) for s in sentences]28 unfinished_parse = partial_parses29 while len(unfinished_parse) > 0:30 minibatch = unfinished_parse[0:batch_size]31 while len(minibatch) > 0:32 transitions = model.predict(minibatch)33 for index, action in enumerate(transitions):34 minibatch[index].parse_step(action)35 minibatch = [parse for parse in minibatch if len(parse.stack) > 1 or len(parse.buffer) > 0]36 unfinished_parse = unfinished_parse[batch_size:]37 dependencies = []38 for n in range(len(sentences)):39 dependencies.append(partial_parses[n].dependencies)40 ### END YOUR CODE41 return dependencies42def test_step(name, transition, stack, buf, deps,43 ex_stack, ex_buf, ex_deps):44 """Tests that a single parse step returns the expected output"""45 pp = PartialParse([])46 pp.stack, pp.buffer, pp.dependencies = stack, buf, deps47 pp.parse_step(transition)48 stack, buf, deps = (tuple(pp.stack), tuple(pp.buffer), tuple(sorted(pp.dependencies)))49 assert stack == ex_stack, \50 "{:} test resulted in stack {:}, expected {:}".format(name, stack, ex_stack)51 assert buf == ex_buf, \52 "{:} test resulted in buffer {:}, expected {:}".format(name, buf, ex_buf)53 assert deps == ex_deps, \54 "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps)55 print "{:} test passed!".format(name)56def test_parse_step():57 """Simple tests for the PartialParse.parse_step function58 Warning: these are not exhaustive59 """60 test_step("SHIFT", "S", ["ROOT", "the"], ["cat", "sat"], [],61 ("ROOT", "the", "cat"), ("sat",), ())62 test_step("LEFT-ARC", "LA", ["ROOT", "the", "cat"], ["sat"], [],63 ("ROOT", "cat",), ("sat",), (("cat", "the"),))64 test_step("RIGHT-ARC", "RA", ["ROOT", "run", "fast"], [], [],65 ("ROOT", "run",), (), (("run", "fast"),))66def test_parse():67 """Simple tests for the PartialParse.parse function68 Warning: these are not exhaustive69 """70 sentence = ["parse", "this", "sentence"]71 dependencies = PartialParse(sentence).parse(["S", "S", "S", "LA", "RA", "RA"])72 dependencies = tuple(sorted(dependencies))73 expected = (('ROOT', 'parse'), ('parse', 'sentence'), ('sentence', 'this'))74 assert dependencies == expected, \75 "parse test resulted in dependencies {:}, expected {:}".format(dependencies, expected)76 assert tuple(sentence) == ("parse", "this", "sentence"), \77 "parse test failed: the input sentence should not be modified"78 print "parse test passed!"79class DummyModel:80 """Dummy model for testing the minibatch_parse function81 First shifts everything onto the stack and then does exclusively right arcs if the first word of82 the sentence is "right", "left" if otherwise.83 """84 def predict(self, partial_parses):85 return [("RA" if pp.stack[1] is "right" else "LA") if len(pp.buffer) == 0 else "S"86 for pp in partial_parses]87def test_dependencies(name, deps, ex_deps):88 """Tests the provided dependencies match the expected dependencies"""89 deps = tuple(sorted(deps))90 assert deps == ex_deps, \91 "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps)92def test_minibatch_parse():93 """Simple tests for the minibatch_parse function94 Warning: these are not exhaustive95 """96 sentences = [["right", "arcs", "only"],97 ["right", "arcs", "only", "again"],98 ["left", "arcs", "only"],99 ["left", "arcs", "only", "again"]]100 deps = minibatch_parse(sentences, DummyModel(), 2)101 test_dependencies("minibatch_parse", deps[0],102 (('ROOT', 'right'), ('arcs', 'only'), ('right', 'arcs')))103 test_dependencies("minibatch_parse", deps[1],104 (('ROOT', 'right'), ('arcs', 'only'), ('only', 'again'), ('right', 'arcs')))105 test_dependencies("minibatch_parse", deps[2],106 (('only', 'ROOT'), ('only', 'arcs'), ('only', 'left')))107 test_dependencies("minibatch_parse", deps[3],108 (('again', 'ROOT'), ('again', 'arcs'), ('again', 'left'), ('again', 'only')))109 print "minibatch_parse test passed!"110if __name__ == '__main__':111 test_parse_step()112 test_parse()...

Full Screen

Full Screen

rarity.py

Source:rarity.py Github

copy

Full Screen

1import os2import glob3import json4import pathlib5import argparse6import traceback7import requests8import pandas as pd9from tqdm import tqdm10from multiprocessing import Pool, cpu_count11SAVE_PATH = 'base_rarity'12PARSE_STEP = 2013METADATA_PATH = None14RARITY_PATH = None15def download(download_arg):16 address, i = download_arg17 token_ids = list(range(i*PARSE_STEP, (i+1)*PARSE_STEP))18 # check if already downloaded19 if all([os.path.exists(os.path.join(METADATA_PATH, f"{x}.json")) for x in token_ids]):20 return False21 # download22 try:23 querystring = {"token_ids": token_ids,24 "offset": "0",25 "limit": "20",26 "asset_contract_address": address}27 r = requests.get("https://api.opensea.io/api/v1/assets", params=querystring)28 js = r.json()29 if len(js['assets']) == 0:30 return True31 for asset in js['assets']:32 save_d = {"token_id": asset['token_id'], "traits": asset.get('traits', [])}33 with open(os.path.join(METADATA_PATH, f"{asset['token_id']}.json"), "w") as f:34 json.dump(save_d, f)35 except Exception as e:36 print("Exception", e, traceback.format_exc())37 return False38class Rarity:39 def __init__(self):40 self.metas = glob.glob(os.path.join(METADATA_PATH, "*"))41 def _calc_rarity(self, row):42 res = 043 for c in self.trait_cols:44 if row[c] == -1:45 continue46 trait_count = self.traits_counter[c][row[c]]47 res += 1 / (trait_count / len(self.metas))48 return res49 def __call__(self):50 df = []51 for x in self.metas:52 js = json.load(open(x))53 d = {}54 for trait in js['traits']:55 d['trait_' + trait['trait_type']] = trait['value']56 d['token_id'] = int(os.path.split(x)[-1].split(".")[0])57 df.append(d)58 df = pd.DataFrame(df)59 df.fillna(-1, inplace=True)60 self.trait_cols = [x for x in df.columns if "trait_" in x]61 self.traits_counter = {}62 for c in self.trait_cols:63 self.traits_counter[c] = df.groupby(c).agg({"token_id": ['count']}).to_dict()[('token_id', 'count')]64 65 df['rarity_score'] = df.apply(lambda x: self._calc_rarity(x), axis=1)66 df = df.sort_values("rarity_score", ascending=False)67 df.to_csv(os.path.join(os.path.split(RARITY_PATH)[0], "rarity.csv"), index=False)68 print("df", df)69 for i, js in enumerate(df.to_dict('records')):70 with open(os.path.join(RARITY_PATH, f"{js['token_id']}.json"), "w") as f:71 js['rarity_place'] = i72 json.dump(js, f)73if __name__ == "__main__":74 print('START RARITY')75 parser = argparse.ArgumentParser()76 parser.add_argument('--contract', type=str, help='Address of contract, i.e. 0x34234...')77 parser.add_argument('--max_token_id', type=int, help='Max token id')78 args = parser.parse_args()79 # create dirs80 METADATA_PATH = os.path.join(SAVE_PATH, args.contract, "metadata")81 pathlib.Path(METADATA_PATH).mkdir(exist_ok=True, parents=True)82 RARITY_PATH = os.path.join(SAVE_PATH, args.contract, "rarity")83 pathlib.Path(RARITY_PATH).mkdir(exist_ok=True, parents=True)84 # calculate args85 N = args.max_token_id // PARSE_STEP + 186 map_args = [(args.contract, i) for i in range(0, N)]87 # download in multiple treads will only work with proxies88 # pool = Pool(cpu_count())89 # download_func = partial(download)90 # results = pool.map(download_func, map_args)91 # pool.close()92 # pool.join()93 # download in 1 thread94 for arg in tqdm(map_args):95 is_last = download(arg)96 if is_last:97 break98 # get_rarity99 rarity = Rarity()...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run Behave automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful