Best Python code snippet using stestr_python
modifySequence.py
Source:modifySequence.py  
1from utils import get_code_txt2from utils import CONFIG3from utils import create_connection4from utils import check_iloc5import numpy as np6import pymysql7import pandas as pd8import pprint9import os10import ast11# from testRunning import running12def found_dataset(old_path, notebook_id, root_path, origin_code):13    """14    :param old_path:15    :param notebook_id:16    :param root_path:17    :param origin_code:18    :return:19    妿è¿è¡æ¶åç°è·¯å¾ä¸å¯¹ï¼æ¾å°éè¦æ¿æ¢çè·¯å¾20    """21    old_root_path = ''22    if '/' not in old_path:23        result = root_path + '/' + old_path24        old_root_path = old_path25    else:26        for index, i in enumerate(old_path.split('/')):27            if index != len(old_path.split('/')) - 1:28                old_root_path = old_root_path + i + '/'29            else:30                if '.' not in i:31                    old_root_path = old_root_path + i32                if '/' == old_root_path[-1]:33                    old_root_path = old_root_path[0:-1]34        result = root_path35    print('old_root_path', old_root_path)36    print("result", result)37    return origin_code.replace(old_root_path, result)38def running_temp_code(func_def, new_path,count, found=False):39    """40    :param func_def: éè¦è¿è¡ç代ç å符串41    :param new_path: æ¿æ¢è·¯å¾42    :param count: ç¬¬å æ¬¡è¿è¡äº43    :return: è¿åä¿®æ¹è¿åæè
æåè¿è¡ç代ç 44    è¿è¡ä»£ç 45    """46    try:47        cm = compile(func_def, '<string>', 'exec')48    except Exception as e:49        print("compile fail", e)50        return "compile fail"51    print("\033[0;33;40m" + str(count) +"\033[0m")52    can_run = False53    try:54        namespace = {}55        exec(cm,namespace)56        print("\033[0;32;40msucceed\033[0m")57        can_run = True58        # return 'succeed'59    except Exception as e:60        # traceback.print_exc()61        error_str = str(e)62        new_code = func_def63        foun = 064        # traceback.print_exc()65        # print("\033[0;31;40merror_str\033[0m", error_str)66        # print("\033[0;31;40merror_str\033[0m", error_str)67        if "[Errno 2] No such file or directory: " in error_str:68            error_path = error_str.replace("[Errno 2] No such file or directory: " , "")69            error_path = error_path[1:-1]70            new_code = found_dataset(error_path, 1, new_path, func_def)71            # print('error_path:', error_path)72            foun=173            print('error 1')74            # running(new_code)75        elif "does not exist:" in error_str and '[Errno 2] File ' in error_str:76            error_path = error_str.split(':')[-1].strip()77            error_path = error_path[1:-1]78            new_code = found_dataset(error_path, 1, new_path, func_def)79            # print('error_path:', error_path)80            # print('new_code:', new_code)81            print("\033[0;31;40merror_str\033[0m", error_str)82            print('error 2')83            foun=184        # elif "No module named " in error_str and '_tkinter' not in error_str:85        #     package = error_str.replace("No module named ", "")86        #     package = package[1:-1]87        #     # command = ' pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple ' + package.split('.')[0]88        #     # os.system(command)89        #     command = ' pip install -i https://pypi.tuna.tsinghua.edu.cn/simple ' + package.split('.')[0] + ' --trusted-host pypi.tuna.tsinghua.edu.cn'90        #     # command = ' pip install ' + package.split('.')[0]91        #     os.system(command)92        #     print('error 3')93        elif  ": No such file or directory" in error_str:94            index1 = error_str.find("'")95            index2 = error_str.find("'", index1+1)96            error_path = error_str[index1+1:index2]97            new_code = found_dataset(error_path, 1, new_path, func_def)98            # print('error_path:', error_path)99            print('error 4')100        elif "Command '['ls'," in error_str:101            index1 = error_str.find('ls')102            # print(index1)103            el_line = error_str[index1+6:]104            # print(el_line)105            right_index  = el_line.find('\'')106            error_path = el_line[0:right_index]107            # print('error_path:', error_path)108            new_code = found_dataset(error_path, 1, new_path, func_def)109            # print('new_code:', new_code)110            # print("\033[0;31;40merror_str\033[0m", error_str)111            foun = 1112            print('error 5')113        elif "File b" in error_str:114            index1 = error_str.find("'")115            index2 = error_str.find("'", index1 + 1)116            error_path = error_str[index1 + 1:index2]117            new_code = found_dataset(error_path, 1, new_path, func_def)118            # print('error_path:', error_path)119            print('error 10')120            foun = 1121            print('error 5')122        elif "'DataFrame' object has no attribute 'ix'" in error_str or "'Series' object has no attribute 'ix'" in error_str:123            new_code = func_def.replace('.ix', '.iloc')124            print('error 6')125        elif "'DataFrame' object has no attribute 'sort'" in error_str:126            new_code = func_def.replace('.sort(', '.sort_values(')127            print('error 7')128        elif "dlopen: cannot load any more object with static TLS" in error_str:129            print("\033[0;31;40merror_str\033[0m", error_str)130            return 'break'131        else:132            # print("?")133            # traceback.print_exc()134            print("\033[0;31;40merror_str\033[0m", error_str)135            print('error 8')136            return 'error 8' + error_str137        if count < 7:138            # print(new_code)139            if foun ==1:140                found = True141            code_list = new_code.split('\n')142            res = running_temp_code(new_code, new_path, count + 1,found)143            if res == 'compile fail' or res== 'False':144                return res145            if res[0:7] == 'error 8':146                return res147            # return res148        else:149            print('error 9')150            return "error 8"151    return func_def152def get_operator_code(notebook_id, notebook_code, change_rank, ope_dic, get_min_max=0):153    code_list = notebook_code.split('\n')154    # for index, line in enumerate(code_list):155    #     print("\033[0;39;41m" + str(index) + ':' + line + "\033[0m")156    cursor, db = create_connection()157    try:158        walk_logs = np.load('../walklogs/' + str(notebook_id) + '.npy', allow_pickle=True).item()159    except:160        walk_logs = []161    sql = "select operator,data_object_value,data_object from operator where rank=" + str(change_rank) + " and notebook_id=" + str(162        notebook_id)163    cursor.execute(sql)164    sql_res = cursor.fetchall()165    operation = ''166    data_object_value = ''167    est_value = ''168    for row in sql_res:169        operation = row[0]170        data_object_value = row[1]171        dj = row[2]172    # print('operation:', operation)173    # print(len(operation))174    check_result = 0175    if 'iloc' in data_object_value and operation == 'iloc':176        check_result = check_iloc(data_object_value)177    # print(check_result)178    if check_result != 2 and check_result != 0:179        return [[[0,-1]]]180    if operation == '':181        return 'no such operator'182    if data_object_value[0] == '(' and data_object_value[-1] == ')':183        data_object_value = data_object_value[1:-1]184    candidate = []185    if data_object_value ==  "raw_data[(raw_data.region == 46)].sort_values(by='Date').set_index('Date').drop(columns=['AveragePrice'])":186        data_object_value = "raw_data[raw_data.region==46].sort_values(by='Date').set_index('Date').drop(columns=['AveragePrice'])"187    elif data_object_value ==  "raw_data[(raw_data.region == 46)].sort_values(by='Date').set_index('Date').drop":188        data_object_value = "raw_data[raw_data.region==46].sort_values(by='Date').set_index('Date').drop"189    # print('data_object_value:', data_object_value)190    temp_data_object_value = data_object_value.replace(' ', '')191    temp_data_object_value1 = data_object_value.replace('(- 1)','-1')192    temp_data_object_value2 = temp_data_object_value1.replace(' ','')193    # print('temp_data_object_value1:',temp_data_object_value1)194    # print('temp_data_object_value2:',temp_data_object_value2)195    if ope_dic[operation]['call_type'] == 0 or ope_dic[operation]['call_type'] == 2 or ope_dic[operation][196        'call_type'] == 4:197        count = 0198        for i in code_list:199            if len(i) > 0:200                if i[0] == '#':201                    count += 1202                    continue203            # print(operation,i)204            temp_code = i.replace('"','\'')205            temp_code = temp_code.replace(' ','')206            if (data_object_value in i or temp_data_object_value in i207                or data_object_value in i.replace('"','\'') or temp_data_object_value in i.replace('"','\'')208                or data_object_value in temp_code or temp_data_object_value in temp_code or temp_data_object_value1 in i or temp_data_object_value2 in i) and operation in i:209                # print('i:',i)210                if temp_data_object_value in i:211                    data_object_value = temp_data_object_value212                if temp_data_object_value1 in i:213                    data_object_value = temp_data_object_value1214                if temp_data_object_value2 in i:215                    data_object_value = temp_data_object_value2216                candidate.append((i, count))217            count += 1218    elif ope_dic[operation]['call_type'] == 3:219        # print(walk_logs["estiminator_values"])220        if "estiminator_values" in walk_logs:221            if operation in walk_logs['estiminator_values']:222                est_value = walk_logs["estiminator_values"][operation]223            else:224                est_value = ''225        else:226            est_value = ''227        # print('est_value', est_value)228        count = 0229        for i in code_list:230            if len(i) > 0:231                if i[0] == '#':232                    count += 1233                    continue234            temp_code = i.replace('"', '\'')235            temp_code = temp_code.replace(' ', '')236            if est_value in i and (data_object_value in i or temp_data_object_value in i237                or data_object_value in i.replace('"','\'') or temp_data_object_value in i.replace('"','\'')238                or data_object_value in temp_code or temp_data_object_value in temp_code or temp_data_object_value1 in i or temp_data_object_value2 in i) and (239                    'fit_transform' in i or 'transform' in i):240                if temp_data_object_value in i:241                    data_object_value = temp_data_object_value242                if temp_data_object_value1 in i:243                    data_object_value = temp_data_object_value1244                if temp_data_object_value2 in i:245                    data_object_value = temp_data_object_value2246                candidate.append((i, count))247                # print(operation,count)248            elif operation in i and (data_object_value in i or temp_data_object_value in i249                or data_object_value in i.replace('"','\'') or temp_data_object_value in i.replace('"','\'')250                or data_object_value in temp_code or temp_data_object_value in temp_code or temp_data_object_value1 in i or temp_data_object_value2 in i) and (251                    'fit_transform' in i or 'transform' in i):252                if temp_data_object_value in i:253                    data_object_value = temp_data_object_value254                if temp_data_object_value1 in i:255                    data_object_value = temp_data_object_value1256                if temp_data_object_value2 in i:257                    data_object_value = temp_data_object_value2258                candidate.append((i, count))259                # print(operation, count)260            if candidate == []:261                if i and (data_object_value in i or temp_data_object_value in i262                                       or data_object_value in i.replace('"',263                                                                         '\'') or temp_data_object_value in i.replace(264                            '"', '\'')265                                       or data_object_value in temp_code or temp_data_object_value in temp_code or temp_data_object_value1 in i or temp_data_object_value2 in i) and (266                        'fit_transform' in i or 'transform' in i):267                    if temp_data_object_value in i:268                        data_object_value = temp_data_object_value269                    if temp_data_object_value1 in i:270                        data_object_value = temp_data_object_value1271                    if temp_data_object_value2 in i:272                        data_object_value = temp_data_object_value2273                    candidate.append((i, count))274                    # print(operation,count)275            count += 1276    elif ope_dic[operation]['call_type'] == 5:277        # print(walk_logs["estiminator_values"])278        count = 0279        for i in code_list:280            if len(i) > 0:281                if i[0] == '#':282                    count += 1283                    continue284            # print(operation,i)285            temp_code = i.replace('"', '\'')286            temp_code = temp_code.replace(' ', '')287            if (data_object_value in i or temp_data_object_value in i or data_object_value in i.replace('"',288                                                                                                        '\'') or temp_data_object_value in i.replace(289                    '"',290                    '\'') or data_object_value in temp_code or temp_data_object_value in temp_code or temp_data_object_value1 in i or temp_data_object_value2 in i) and operation in i:291                # print('i:',i)292                if temp_data_object_value in i:293                    data_object_value = temp_data_object_value294                if temp_data_object_value1 in i:295                    data_object_value = temp_data_object_value1296                if temp_data_object_value2 in i:297                    data_object_value = temp_data_object_value2298                candidate.append((i, count))299            count += 1300    # print('???')301    # print(candidate)302    # print('min_max:',get_min_max)303    if get_min_max == 0:304        if len(candidate) > 1:305            if change_rank > 1:306                last_get = get_operator_code(notebook_id, notebook_code, change_rank - 1, ope_dic, get_min_max=1)307                if type(last_get).__name__ != 'str':308                    min = last_get[0]309                    if last_get[0][0][1] == -1:310                        min = last_get[0]311                else:312                    min = [(0, 0)]313                print('last_get:', last_get)314            else:315                min = [(0, 0)]316            next_get = get_operator_code(notebook_id, notebook_code, change_rank + 1, ope_dic, get_min_max=2)317            # print(next_get)318            if type(next_get).__name__ == 'str':319                max = [(0, 1000)]320            elif next_get[0][0][1] == -1:321                max = [(0, 1000)]322            else:323                max = next_get[0]324            print('max:', max)325            print('min:', min)326            print('candidate:', candidate)327            temp_candicate = []328            for i in candidate:329                if i[1] >= min[0][1] and i[1] <= max[0][1]:330                    temp_candicate.append(i)331            if len(temp_candicate) == 0:332                for i in candidate:333                    if i[1] <= max[0][1]:334                        temp_candicate.append(i)335            candidate = [temp_candicate[-1]]336        elif len(candidate) == 0:337            return 'no such operator'338        return candidate, operation, data_object_value, est_value,dj339    elif get_min_max == 1:340        # print('1_candidate:', candidate)341        if len(candidate) > 1:342            # print('candidate:', candidate)343            if change_rank > 1:344                last_get = get_operator_code(notebook_id, notebook_code, change_rank - 1, ope_dic, get_min_max=1)345                print('last_get:', last_get)346                if type(last_get).__name__ != 'str':347                    min = last_get[0]348                    if last_get[0][0][1] == -1:349                        min = last_get[0]350                else:351                    min = [(0, 0)]352            else:353                min = [(0, 0)]354            temp_candicate = []355            count = 0356            for i in candidate:357                count += 1358                # print('count:', count)359                # print('min:',min)360                # print('type:min:', type(min))361                # print('i[1]:',i[1])362                if i[1] > min[0][1]:363                    temp_candicate.append(i)364            # print('len(:',len(temp_candicate))365            if len(temp_candicate) == 0:366                temp_candicate = min367            candidate = [temp_candicate[0]]368            # print('return:', candidate)369        elif len(candidate) == 0:370            return 'no such operator'371        return candidate, operation, data_object_value, est_value,dj372    elif get_min_max == 2:373        # print('2_candidate:', candidate)374        if len(candidate) > 1:375            # print('candidate:', candidate)376            if change_rank > 1:377                last_get = get_operator_code(notebook_id, notebook_code, change_rank + 1, ope_dic, get_min_max=1)378                print('last_get:', last_get)379                if type(last_get).__name__ != 'str':380                    max = last_get[0]381                    if last_get[0][0][1] == -1:382                        max = last_get[0]383                else:384                    max = [(0, 1000)]385            else:386                max = [(0, 1000)]387            temp_candicate = []388            for i in candidate:389                if i[1] < max[0][1]:390                    temp_candicate.append(i)391            if len(temp_candicate) == 0:392                temp_candicate = max393            candidate = [temp_candicate[-1]]394        elif len(candidate) == 0:395            return 'no such operator'396        return candidate, operation, data_object_value, est_value,dj397def get_result_code(notebook_id, notebook_code, result_rank, get_min_max=0):398    def delete_error_tuple():399        cursor, db = create_connection()400        sql = "select id from result where notebook_id=" + str(notebook_id)401        cursor.execute(sql)402        sql_res = cursor.fetchall()403        count = 1404        need_delete_id = -1405        for row in sql_res:406            if count != result_rank:407                count += 1408                continue409            need_delete_id = row[0]410            break411        if need_delete_id != -1:412            sql = 'delete from result where id=' + str(need_delete_id)413        cursor.execute(sql)414        db.commit()415    code_list = notebook_code.split('\n')416    # for index, line in enumerate(code_list):417    #     print("\033[0;35;40m" + str(index) + ':' + line + "\033[0m")418    cursor, db = create_connection()419    sql = "select code from result where notebook_id=" + str(notebook_id)420    cursor.execute(sql)421    sql_res = cursor.fetchall()422    data_object_value = ''423    est_value = ''424    count = 1425    code = ''426    for row in sql_res:427        if count != result_rank:428            count += 1429            continue430        code = row[0]431        break432    if code == '':433        return 'no such result'434    candidate = []435    count = 0436    # print(code)437    for i in code_list:438        if len(i) > 0:439            if i[0] == '#':440                count += 1441                continue442        if code in i.replace(' ',''):443            candidate.append((i, count))444        count += 1445    # print('cadidate:',candidate)446    if candidate == []:447        return 'no such result'448    if get_min_max == 0:449        if len(candidate) > 1:450            # print('candidate:', candidate)451            if result_rank > 1:452                last_get = get_result_code(notebook_id, notebook_code, result_rank - 1, get_min_max=1)453                if last_get != 'no such result':454                    min = last_get455                else:456                    min = [(0, 0)]457            else:458                min = [(0, 0)]459            next_get = get_result_code(notebook_id, notebook_code, result_rank + 1, get_min_max=2)460            if next_get == 'no such result':461                max = [(0, 1000)]462            else:463                max = next_get464            # print('min:',min)465            # print('max:',max)466            if max[0][1] < min[0][1]:467                delete_error_tuple()468                temp_candicate = []469                for i in candidate:470                    if i[1] >= min[0][1]:471                        temp_candicate.append(i)472            else:473                temp_candicate = []474                for i in candidate:475                    # print(i)476                    if i[1] >= min[0][1] and i[1] <= max[0][1]:477                        temp_candicate.append(i)478            candidate = [temp_candicate[0]]479        return candidate480    elif get_min_max == 1:481        # print('1_candidate:', candidate)482        if len(candidate) > 1:483            # print('candidate:', candidate)484            if result_rank > 1:485                last_get = get_result_code(notebook_id, notebook_code, result_rank - 1, get_min_max=1)486                if last_get != 'no such result':487                    min = last_get488                else:489                    min = [(0, 0)]490            else:491                min = [(0, 0)]492            temp_candicate = []493            for i in candidate:494                if i[1] > min[0][1]:495                    temp_candicate.append(i)496            if temp_candicate == []:497                return 'no such result'498            candidate = [temp_candicate[0]]499        return candidate500    elif get_min_max == 2:501        # print('2_candidate:', candidate)502        if len(candidate) > 1:503            # print('candidate:', candidate)504            if result_rank > 1:505                last_get = get_result_code(notebook_id, notebook_code, result_rank + 1, get_min_max=1)506                if last_get != 'no such result':507                    max = last_get508                else:509                    max = [(0, 1000)]510            else:511                max = [(0, 1000)]512            temp_candicate = []513            for i in candidate:514                if i[1] < max[0][1]:515                    temp_candicate.append(i)516            if temp_candicate == []:517                return 'no such result'518            candidate = [temp_candicate[-1]]519        return candidate520def changeOperator(notebook_id, change_rank, target_content, notebook_root_path='../notebook/'):521    """522    :param notebook_id:523    :param notebook_root_path:524    :param change_rank:525    :param target_content: {526        operation: '',527        ope_type: 1,528        parameters: [],529        }530    :return:531    """532    ope_dic = eval(CONFIG.get('operators', 'operations'))533    notebook_path = notebook_root_path + str(notebook_id) + '.ipynb'534    notebook_code = get_code_txt(notebook_path)535    # print(notebook_code)536    res = get_operator_code(notebook_id,notebook_code,change_rank,ope_dic)537    if res == 'no such operator':538        return res539    candidate_code_list =res[0]540    operation =res[1]541    data_object_value =res[2]542    est_value =res[3]543    print(candidate_code_list)544    # if len(candidate_code_list) == 1:545    candidate_code = candidate_code_list[0][0]546    line_number = candidate_code_list[0][1]547    print(candidate_code)548    print(line_number)549    need_replace = ''550    data_object = ''551    call_type = ope_dic[operation]['call_type']552    if call_type == 0:553        data_object = data_object_value[0:data_object_value.find(operation)-1]554        # print(candidate_code.find(data_object_value))555        need_code =  candidate_code[candidate_code.find(data_object_value):]556        print('need_code:',need_code)557        operation_index = need_code.find(operation)558        code1 = need_code[0:operation_index]559        print("code1:",code1)560        need_code = need_code[operation_index:]561        left_index = need_code.find('(')562        ind = left_index+1563        left_count = 1564        while left_count!=0:565            if need_code[ind] == '(':566                left_count += 1567            elif need_code[ind] == ')':568                left_count -= 1569            ind += 1570        print("need_code:", need_code[0:ind])571        need_replace = code1 + need_code[0:ind]572    elif call_type == 2 or call_type == 4:573        data_object = data_object_value574        need_code_index = candidate_code.find(operation)575        head = need_code_index576        prefix = ''577        if need_code_index > 1:578            if candidate_code[need_code_index-1] == '.':579                head = need_code_index -2580                while candidate_code[head].isalnum():581                    head -= 1582                prefix = candidate_code[head+1:need_code_index]583        need_code = candidate_code[need_code_index:]584        left_index = need_code.find('(')585        ind = left_index + 1586        left_count = 1587        while left_count != 0:588            if need_code[ind] == '(':589                left_count += 1590            elif need_code[ind] == ')':591                left_count -= 1592            ind += 1593        need_replace = prefix + need_code[0:ind]594    elif call_type == 3:595        if operation in candidate_code:596            head = candidate_code.find(operation)597        elif est_value in candidate_code:598            head = candidate_code.find(operation)599        else:600            return 'no estiminator'601        need_code = candidate_code[head:]602        if 'fit_transform' in candidate_code:603            fit_index = need_code.find('fit_transform')604        elif 'transform' in candidate_code:605            fit_index = need_code.find('transform')606        else:607            return 'no transform function'608        prefix = need_code[0:fit_index]609        need_code = need_code[fit_index:]610        left_index = need_code.find('(')611        ind = left_index + 1612        left_count = 1613        while left_count != 0:614            if need_code[ind] == '(':615                left_count += 1616            elif need_code[ind] == ')':617                left_count -= 1618            ind += 1619        need_replace = prefix + need_code[0:ind]620        data_object = data_object_value621    if 'data_object' in target_content.keys():622        if target_content['data_object'] !=  '':623            data_object = target_content['data_object']624    if ('+' in data_object or '-' in data_object or '*' in data_object or '/' in data_object) \625            and not (data_object[0] == '(' and data_object[-1] == ')'):626        data_object = '(' + data_object + ')'627    if need_replace != '' and data_object != '':628        param_code = ''629        for index,param in enumerate(target_content['parameters']):630            param_code += str(param)631            if index != len(target_content['parameters'])-1:632                param_code += ','633        if target_content['ope_type'] == 0:634            new_code_line = data_object + '.' + target_content['operation'] + '(' + param_code + ')'635            package_code = 'import pandas as pd\n'636        elif target_content['ope_type'] == 2:637            if param_code != '':638                new_code_line = 'pd.' + target_content['operation'] + '(' + data_object + ',' + param_code + ')'639            else:640                new_code_line = 'pd.' + target_content['operation'] + '(' + data_object + ')'641            package_code = 'import pandas as pd\n'642        elif target_content['ope_type'] == 3:643            new_code_line = target_content['operation'] + '(' + param_code + ')' + '.' + 'fit_transform(' + data_object +')'644            if target_content['operation'] == 'SimpleImputer':645                package_code = 'from sklearn.impute import SimpleImputer\n'646            elif target_content['operation'] == 'PCA':647                package_code = 'from sklearn.decomposition import PCA\n'648            else:649                package_code = 'from sklearn.preprocessing import ' + target_content['operation'] + '\n'650            # param_code += 'from sklearn.preprocessing import OneHotEncoder\n'651            # param_code += 'from sklearn.preprocessing import LabelEncoder\n'652            # param_code += 'from sklearn.preprocessing import LabelBinarizer\n'653            # param_code += 'from sklearn.preprocessing import StandardScaler\n'654            # param_code += 'from sklearn.preprocessing import MinMaxScaler\n'655            # param_code += 'from sklearn.preprocessing import RobustScaler\n'656            # param_code += 'from sklearn.preprocessing import Normalizer\n'657            #658        elif target_content['ope_type'] == 4:659            if target_content['operation']  == 'boxcox' or target_content['operation']  == 'boxcox1p':660                package_code = 'from scipy.stats import boxcox\n'661                package_code += 'from scipy.special import boxcox1p\n'662                if param_code != '':663                    new_code_line =target_content['operation'] + '(' + data_object + ',' + param_code + ')'664                else:665                    new_code_line =target_content['operation'] + '(' + data_object + ')'666            elif target_content['operation'] == 'l2_normalize':667                prefix = 'tf.nn.'668                if param_code != '':669                    new_code_line =prefix + target_content['operation'] + '(' + data_object + ',' + param_code + ')'670                else:671                    new_code_line =prefix + target_content['operation'] + '(' + data_object + ')'672                package_code = 'import tensorflow as tf'673            else:674                package_code = 'import numpy as np\n'675                alias = 'np'676                if param_code != '':677                    new_code_line = alias + '.' + target_content['operation'] + '(' + data_object + ',' + param_code + ')'678                else:679                    new_code_line = alias + '.' + target_content[680                        'operation'] + '(' + data_object + ')'681        new_code = ''682        code_list = notebook_code.split('\n')683        replaced_line = candidate_code.replace(need_replace,new_code_line)684        for index,line in enumerate(code_list):685            if index != line_number:686                new_code += line687                new_code += '\n'688            else:689                new_code += replaced_line690                new_code += '\n'691        new_code = package_code + new_code692        print('need_replace:', need_replace)693        print('new_code:', new_code_line)694        return new_code695    else:696        return notebook_code697    # else:698    #     return notebook_code699def deleteOperator(notebook_id, change_rank, notebook_root_path='../notebook/'):700    ope_dic = eval(CONFIG.get('operators', 'operations'))701    notebook_path = notebook_root_path + str(notebook_id) + '.ipynb'702    notebook_code = get_code_txt(notebook_path)703    code_list = notebook_code.split('\n')704    for index, line in enumerate(code_list):705        print("\033[0;35;40m" + str(index) + ':' + line + "\033[0m")706    res = get_operator_code(notebook_id, notebook_code, change_rank, ope_dic)707    if res == 'no such operator':708        return res709    candidate_code_list =res[0]710    print(candidate_code_list)711    line_number = candidate_code_list[0][1]712    new_code = ''713    code_list = notebook_code.split('\n')714    for index, line in enumerate(code_list):715        if index != line_number:716            new_code += line717            new_code += '\n'718    return new_code719def get_seq_from_rank(seq, notebook_id, padding=50):720    list = seq.split(',')721    seq_list = []722    ope_dic = eval(CONFIG.get('operators', 'operations'))723    for rank in list:724        sql = 'select operator from operator where notebook_id='+str(notebook_id) + ' and rank='+str(rank)725        cursor, db = create_connection()726        cursor.execute(sql)727        sql_res = cursor.fetchall()728        operator =''729        for row in sql_res:730            operator=row[0]731            break732        one_hot_list = list(np.zeros((27,)))733        one_hot_list[ope_dic[operator]['index']-1] = 1734        seq_list.append(one_hot_list)735    len_seq = len(seq_list)736    for i in range(len_seq,padding):737        seq_list.append(list(np.zeros((27,))))738    seq_list=np.array(seq_list)739    return seq_list740def get_origin_data(notebook_id,notebook_root='../spider/notebook',dataset_root_path='../spider/unzip_dataset'):741    cursor, db = create_connection()742    sql = 'select dataset.dataSourceUrl from dataset,notebook,pair where dataset.id=pair.did and notebook.id=pair.nid and notebook.id=' + str(743        notebook_id)744    cursor.execute(sql)745    sql_res = cursor.fetchall()746    file_list = []747    for row in sql_res:748        temp = "/" + row[0].split('/')[-1] + '.zip'749        file_list.append(temp)750        # break751    try:752        ct = get_code_txt(notebook_root + '/' + str(notebook_id) + '.ipynb')753    except:754        return 'no such notebook'755    code_list = ct.split('\n')756    find_fail = True757    print(file_list)758    for dataset_p in file_list:759        dataset_root_path += dataset_p760        dataset_root_path += '/'761        if not os.path.exists(dataset_root_path):762            return 'no such dataset'763        for code_txt in code_list:764            # print(code_txt)765            if 'read_csv(' in code_txt:766                r_node = ast.parse(code_txt.strip())767                print(code_txt)768                try:769                    print(type(r_node.body[0].value.args[0]))770                    if type(r_node.body[0].value.args[0]).__name__ == 'Str':771                        file_path = r_node.body[0].value.args[0].s772                        file_name = file_path.split('/')[-1]773                    elif type(r_node.body[0].value.args[0]).__name__ == 'Name':774                        file_path = r_node.body[0].value.args[0].id775                        file_name = file_path.split('/')[-1]776                    else:777                        fl = os.listdir(dataset_root_path)778                        file_name = fl[0]779                except:780                    fl = os.listdir(dataset_root_path)781                    file_name = fl[0]782                file_path = dataset_root_path + file_name783                try:784                    origin_df = pd.read_csv(file_path)785                except Exception as e:786                    print(e)787                    find_fail = False788                if find_fail == True:789                    break790                else:791                    continue792            elif 'read_pickle(' in code_txt:793                r_node = ast.parse(code_txt)794                file_path = r_node.body[0].value.args[0].s795                file_name = file_path.split('/')[-1]796                file_path = dataset_root_path + file_name797                origin_df = pd.read_pickle(file_path)798                try:799                    origin_df = pd.read_csv(file_path)800                except Exception as e:801                    print(e)802                    find_fail = False803                if find_fail == True:804                    break805                else:806                    continue807            elif 'read_fwf(' in code_txt:808                r_node = ast.parse(code_txt)809                file_path = r_node.body[0].value.args[0].s810                file_name = file_path.split('/')[-1]811                file_path = dataset_root_path + file_name812                origin_df = pd.read_fwf(file_path)813                try:814                    origin_df = pd.read_csv(file_path)815                except Exception as e:816                    print(e)817                    find_fail = False818                if find_fail == True:819                    break820                else:821                    continue822            elif 'read_clipboard(' in code_txt:823                r_node = ast.parse(code_txt)824                file_path = r_node.body[0].value.args[0].s825                file_name = file_path.split('/')[-1]826                file_path = dataset_root_path + file_name827                origin_df = pd.read_clipboard(file_path)828                try:829                    origin_df = pd.read_csv(file_path)830                except Exception as e:831                    print(e)832                    find_fail = False833                if find_fail == True:834                    break835                else:836                    continue837            # elif 'read_json(' in code_txt:838            #     r_node = ast.parse(code_txt)839            #     for arg in r_node.body[0].value.args:840            #     file_path = r_node.body[0].value.args[0].s841            #     file_name = file_path.split('/')[-1]842            #     file_path = dataset_root_path + file_name843            #     origin_df = pd.read_json(file_path)844            #845            #     try:846            #         origin_df = pd.read_csv(file_path)847            #     except Exception as e:848            #         print(e)849            #         find_fail = False850            #851            #     if find_fail == True:852            #         break853            #     else:854            #         continue855            elif 'json_normalize(' in code_txt:856                r_node = ast.parse(code_txt)857                file_path = r_node.body[0].value.args[0].s858                file_name = file_path.split('/')[-1]859                file_path = dataset_root_path + file_name860                origin_df = pd.json_normalize(file_path)861                try:862                    origin_df = pd.read_csv(file_path)863                except Exception as e:864                    print(e)865                    find_fail = False866                if find_fail == True:867                    break868                else:869                    continue870            elif 'read_html(' in code_txt:871                r_node = ast.parse(code_txt)872                file_path = r_node.body[0].value.args[0].s873                file_name = file_path.split('/')[-1]874                file_path = dataset_root_path + file_name875                origin_df = pd.read_html(file_path)876                try:877                    origin_df = pd.read_csv(file_path)878                except Exception as e:879                    print(e)880                    find_fail = False881                if find_fail == True:882                    break883                else:884                    continue885            elif 'read_hdf(' in code_txt:886                r_node = ast.parse(code_txt)887                file_path = r_node.body[0].value.args[0].s888                file_name = file_path.split('/')[-1]889                file_path = dataset_root_path + file_name890                origin_df = pd.read_hdf(file_path)891                try:892                    origin_df = pd.read_csv(file_path)893                except Exception as e:894                    print(e)895                    find_fail = False896                if find_fail == True:897                    break898                else:899                    continue900            elif 'read_feather(' in code_txt:901                r_node = ast.parse(code_txt)902                file_path = r_node.body[0].value.args[0].s903                file_name = file_path.split('/')[-1]904                file_path = dataset_root_path + file_name905                origin_df = pd.read_feather(file_path)906                try:907                    origin_df = pd.read_csv(file_path)908                except Exception as e:909                    print(e)910                    find_fail = False911                if find_fail == True:912                    break913                else:914                    continue915            elif 'read_parquet(' in code_txt:916                r_node = ast.parse(code_txt)917                file_path = r_node.body[0].value.args[0].s918                file_name = file_path.split('/')[-1]919                file_path = dataset_root_path + file_name920                origin_df = pd.read_parquet(file_path)921                try:922                    origin_df = pd.read_csv(file_path)923                except Exception as e:924                    print(e)925                    find_fail = False926                if find_fail == True:927                    break928                else:929                    continue930            elif 'read_orc(' in code_txt:931                r_node = ast.parse(code_txt)932                file_path = r_node.body[0].value.args[0].s933                file_name = file_path.split('/')[-1]934                file_path = dataset_root_path + file_name935                origin_df = pd.read_orc(file_path)936                try:937                    origin_df = pd.read_csv(file_path)938                except Exception as e:939                    print(e)940                    find_fail = False941                if find_fail == True:942                    break943                else:944                    continue945            elif 'read_sas(' in code_txt:946                r_node = ast.parse(code_txt)947                file_path = r_node.body[0].value.args[0].s948                file_name = file_path.split('/')[-1]949                file_path = dataset_root_path + file_name950                origin_df = pd.read_sas(file_path)951                try:952                    origin_df = pd.read_csv(file_path)953                except Exception as e:954                    print(e)955                    find_fail = False956                if find_fail == True:957                    break958                else:959                    continue960            elif 'read_spss(' in code_txt:961                r_node = ast.parse(code_txt)962                file_path = r_node.body[0].value.args[0].s963                file_name = file_path.split('/')[-1]964                file_path = dataset_root_path + file_name965                origin_df = pd.read_spss(file_path)966                try:967                    origin_df = pd.read_csv(file_path)968                except Exception as e:969                    print(e)970                    find_fail = False971                if find_fail == True:972                    break973                else:974                    continue975            elif 'read_sql_table(' in code_txt:976                r_node = ast.parse(code_txt)977                file_path = r_node.body[0].value.args[0].s978                file_name = file_path.split('/')[-1]979                file_path = dataset_root_path + file_name980                origin_df = pd.read_sql_table(file_path)981                try:982                    origin_df = pd.read_csv(file_path)983                except Exception as e:984                    print(e)985                    find_fail = False986                if find_fail == True:987                    break988                else:989                    continue990            # elif 'read_sql_query(' in code_txt:991            #     r_node = ast.parse(code_txt)992            #     file_path = r_node.body[0].value.args[0].s993            #     file_name = file_path.split('/')[-1]994            #     file_path = dataset_root_path + file_name995            #     origin_df = pd.read_sql_query(file_path)996            #997            #     try:998            #         origin_df = pd.read_csv(file_path)999            #     except Exception as e:1000            #         print(e)1001            #         find_fail = False1002            #1003            #     if find_fail == True:1004            #         break1005            #     else:1006            #         continue1007            elif 'read_gbq(' in code_txt:1008                r_node = ast.parse(code_txt)1009                file_path = r_node.body[0].value.args[0].s1010                file_name = file_path.split('/')[-1]1011                file_path = dataset_root_path + file_name1012                origin_df = pd.read_gbq(file_path)1013                try:1014                    origin_df = pd.read_csv(file_path)1015                except Exception as e:1016                    print(e)1017                    find_fail = False1018                if find_fail == True:1019                    break1020                else:1021                    continue1022            elif 'read_stata(' in code_txt:1023                r_node = ast.parse(code_txt)1024                file_path = r_node.body[0].value.args[0].s1025                file_name = file_path.split('/')[-1]1026                file_path = dataset_root_path + file_name1027                origin_df = pd.read_stata(file_path)1028                try:1029                    origin_df = pd.read_csv(file_path)1030                except Exception as e:1031                    print(e)1032                    find_fail = False1033                if find_fail == True:1034                    break1035                else:1036                    continue1037            elif 'open(' in code_txt:1038                index = code_txt.find('open(')1039                if index != 0:1040                    if code_txt[index-1] == '.':1041                        continue1042                try:1043                    r_node = ast.parse(code_txt.strip())1044                except:1045                    continue1046                print(code_txt)1047                try:1048                    print(type(r_node.body[0].value.args[0]))1049                    if type(r_node.body[0].value.args[0]).__name__ == 'Str':1050                        file_path = r_node.body[0].value.args[0].s1051                        file_name = file_path.split('/')[-1]1052                    elif type(r_node.body[0].value.args[0]).__name__ == 'Name':1053                        file_path = r_node.body[0].value.args[0].id1054                        file_name = file_path.split('/')[-1]1055                    else:1056                        fl = os.listdir(dataset_root_path)1057                        file_name = fl[0]1058                except:1059                    fl = os.listdir(dataset_root_path)1060                    file_name = fl[0]1061                file_path = dataset_root_path + file_name1062                if '.csv' in file_name:1063                    try:1064                        origin_df = pd.read_csv(file_path)1065                    except Exception as e:1066                        print(e)1067                        find_fail = False1068                    if find_fail == True:1069                        break1070                    else:1071                        continue1072            else:1073                # print('no such df')1074                origin_df = 'no such df'1075        if type(origin_df).__name__ == 'str':1076            print('no origin df')1077            return 'no origin df'1078        else:1079            dtypes = origin_df.dtypes1080            origin_num_df_list = []1081            origin_cat_df_list = []1082            origin_column_info = {}1083            for i in range(len(dtypes)):1084                if str(dtypes.values[i]) == 'int64' or str(dtypes.values[i]) == 'float64' or str(1085                        dtypes.values[i]) == 'int32' \1086                        or str(dtypes.values[i]) == 'float32' or str(dtypes.values[i]) == 'int' or str(1087                    dtypes.values[i]) == 'float':1088                    origin_num_df_list.append(dtypes.index[i])1089                elif str(dtypes.values[i]) == 'str' or str(dtypes.values[i]) == 'Category':1090                    origin_cat_df_list.append(dtypes.index[i])1091                origin_column_info[i] = {}1092                origin_column_info[i]['col_name'] = dtypes.index[i]1093                origin_column_info[i]['dtype'] = str(dtypes.values[i])1094                origin_column_info[i]['content'] = origin_df[dtypes.index[i]].values1095                origin_column_info[i]['length'] = len(origin_df[dtypes.index[i]].values)1096                origin_column_info[i]['null_ratio'] = origin_df[dtypes.index[i]].isnull().sum() / len(1097                    origin_df[dtypes.index[i]].values)1098                origin_column_info[i]['ctype'] = 1 if str(dtypes.values[i]) == 'int64' or str(1099                    dtypes.values[i]) == 'float64' or str(dtypes.values[i]) == 'int32' \1100                                                      or str(dtypes.values[i]) == 'float32' or str(1101                    dtypes.values[i]) == 'int' or str(dtypes.values[i]) == 'float' else 21102                origin_column_info[i]['nunique'] = origin_df[dtypes.index[i]].nunique()1103                origin_column_info[i]['nunique_ratio'] = origin_df[dtypes.index[i]].nunique() / len(1104                    origin_df[dtypes.index[i]].values)1105            # pprint.pprint(column_info[0])1106            for column in origin_column_info:1107                if origin_column_info[column]['ctype'] == 1:  # å¦ææ¯æ°åå1108                    origin_column_info[column]['mean'] = origin_df[origin_column_info[column]['col_name']].describe()[1109                        'mean']1110                    origin_column_info[column]['std'] = origin_df[origin_column_info[column]['col_name']].describe()[1111                        'std']1112                    origin_column_info[column]['min'] = origin_df[origin_column_info[column]['col_name']].describe()[1113                        'min']1114                    origin_column_info[column]['25%'] = origin_df[origin_column_info[column]['col_name']].describe()[1115                        '25%']1116                    origin_column_info[column]['50%'] = origin_df[origin_column_info[column]['col_name']].describe()[1117                        '50%']1118                    origin_column_info[column]['75%'] = origin_df[origin_column_info[column]['col_name']].describe()[1119                        '75%']1120                    origin_column_info[column]['max'] = origin_df[origin_column_info[column]['col_name']].describe()[1121                        'max']1122                    origin_column_info[column]['median'] = origin_df[origin_column_info[column]['col_name']].median()1123                    if len(origin_df[origin_column_info[column]['col_name']].mode()) == 0:1124                        origin_column_info[column]['mode'] = 'NAN'1125                    else:1126                        origin_column_info[column]['mode'] = origin_df[origin_column_info[column]['col_name']].mode().iloc[0]1127                    origin_column_info[column]['mode_ratio'] = \1128                        origin_df[origin_column_info[column]['col_name']].astype('category').describe().iloc[3] / \1129                        origin_column_info[column][1130                            'length']1131                    origin_column_info[column]['sum'] = origin_df[origin_column_info[column]['col_name']].sum()1132                    origin_column_info[column]['skew'] = origin_df[origin_column_info[column]['col_name']].skew()1133                    origin_column_info[column]['kurt'] = origin_df[origin_column_info[column]['col_name']].kurt()1134                elif origin_column_info[column]['ctype'] == 2:  # categoryå1135                    origin_column_info[column]['nunique'] = origin_df[origin_column_info[column]['col_name']].nunique()1136                    origin_column_info[column]['unique'] = origin_df[origin_column_info[column]['col_name']].unique()1137                    for item in origin_df[origin_column_info[column]['col_name']].unique():1138                        # print(item)1139                        temp = 01140                        for va in origin_df[origin_column_info[column]['col_name']].values:1141                            if va == item:1142                                temp += 11143                        origin_column_info[column][item] = temp1144        # print('origin_column_info')1145        # pprint.pprint(origin_column_info)1146        break1147    return origin_column_info1148def sampling(action, notebook_id, result_id, notebook_root='../spider/notebook',dataset_root='../unzip_dataset',T=True):1149    """1150    :param s: s[0] = dataframe input to model, s[1] = sequence tensor, s[2] = model_id1151    :param action: action = operator_name1152    :return: r = [-1:1], s1 = new state1153    """1154    cursor, db = create_connection()1155    # walk_logs = np.load('../walklogs/' + str(notebook_id) + '.npy', allow_pickle=True).item()1156    sql = 'select content,sequence,model_type from result where id='+str(result_id)1157    cursor.execute(sql)1158    sql_res = cursor.fetchall()1159    model_id_dic=np.load('./model_dic.npy',allow_pickle=True).item()1160    seq = ''1161    score = 01162    model_type = ''1163    for row in sql_res:1164        seq = row[1]1165        score = row[0]1166        model_type = row[2]1167    print(model_type)1168    if model_type not in model_id_dic:1169        print('useless result')1170        return 'useless result'1171    #####get input data of model #######1172    num_df_list = []1173    cat_df_list = []1174    column_info = {}1175    file_list = os.listdir('../predf/'+ str(notebook_id) + '/')1176    if str(result_id) + '.csv' in file_list:1177        s_df = pd.read_csv('../predf/'+ str(notebook_id) + '/' + str(result_id) + '.csv')1178        dtypes = s_df.dtypes1179        for i in range(len(dtypes)):1180            if str(dtypes.values[i]) == 'int64' or str(dtypes.values[i]) == 'float64' or str(dtypes.values[i]) == 'int32' \1181                    or str(dtypes.values[i]) == 'float32' or str(dtypes.values[i]) == 'int' or str(dtypes.values[i]) == 'float':1182                num_df_list.append(dtypes.index[i])1183            elif dtypes.values[i] == 'str' or dtypes.values[i] == 'Category':1184                cat_df_list.append(dtypes.index[i])1185            column_info[i] = {}1186            column_info[i]['col_name'] = dtypes.index[i]1187            column_info[i]['dtype'] = str(dtypes.values[i])1188            column_info[i]['content'] = s_df[dtypes.index[i]].values1189            column_info[i]['length'] = len(s_df[dtypes.index[i]].values)1190            column_info[i]['null_ratio'] = s_df[dtypes.index[i]].isnull().sum()/len(s_df[dtypes.index[i]].values)1191            column_info[i]['ctype'] = 1 if str(dtypes.values[i]) == 'int64' or str(dtypes.values[i]) == 'float64' or str(dtypes.values[i]) == 'int32' \1192                    or str(dtypes.values[i]) == 'float32' or str(dtypes.values[i]) == 'int' or str(dtypes.values[i]) == 'float' else 21193            column_info[i]['nunique'] = s_df[dtypes.index[i]].nunique()1194            column_info[i]['nunique_ratio'] = s_df[dtypes.index[i]].nunique()/len(s_df[dtypes.index[i]].values)1195        # pprint.pprint(column_info[0])1196        for column in column_info:1197            column_feature = []1198            if column_info[column]['ctype'] == 1: #å¦ææ¯æ°åå1199                column_info[column]['mean'] = s_df[column_info[column]['col_name']].describe()['mean']1200                column_info[column]['std'] = s_df[column_info[column]['col_name']].describe()['std']1201                column_info[column]['min'] = s_df[column_info[column]['col_name']].describe()['min']1202                column_info[column]['25%'] = s_df[column_info[column]['col_name']].describe()['25%']1203                column_info[column]['50%'] = s_df[column_info[column]['col_name']].describe()['50%']1204                column_info[column]['75%'] = s_df[column_info[column]['col_name']].describe()['75%']1205                column_info[column]['max'] = s_df[column_info[column]['col_name']].describe()['max']1206                column_info[column]['median'] = s_df[column_info[column]['col_name']].median()1207                column_info[column]['mode'] = s_df[column_info[column]['col_name']].mode().iloc[0]1208                column_info[column]['mode_ratio'] = s_df[column_info[column]['col_name']].astype('category').describe().iloc[3]/column_info[column]['length']1209                column_info[column]['sum'] = s_df[column_info[column]['col_name']].sum()1210                column_info[column]['skew'] = s_df[column_info[column]['col_name']].skew()1211                column_info[column]['kurt'] = s_df[column_info[column]['col_name']].kurt()1212            elif column_info[column]['ctype']==2: #categoryå1213                column_info[i]['mean'] = 01214                column_info[i]['std'] = 01215                column_info[i]['min'] = 01216                column_info[i]['25%'] = 01217                column_info[i]['50%'] = 01218                column_info[i]['75%'] = 01219                column_info[i]['max'] = 01220                column_info[i]['median'] = 01221                column_info[i]['mode'] = 01222                column_info[i]['mode_ratio'] = 01223                column_info[i]['sum'] = 01224                column_info[i]['skew'] = 01225                column_info[i]['kurt'] = 01226                # column_info[column]['unique'] = s_df[column_info[column]['col_name']].unique()1227                # for item in s_df[column_info[column]['col_name']].unique():1228                #     temp1 = [x for i, x in enumerate(s_df[column_info[column]['col_name']]) if1229                #              s_df[column_info[column]['col_name']].iat[0, i] == item]1230                #     column_info[column][item] = len(temp1)1231            for key in column_info[column]:1232                if key != 'col_name':1233                    column_feature[key].append(column_info[column][key])1234            # break1235    elif str(result_id) + '.npy' in file_list:1236        inp_data = np.load('../predf/' + str(notebook_id) + '/' + str(result_id) + '.npy').T.tolist()1237        for i,col in enumerate(inp_data):1238            s_s = pd.Series(col)1239            if str(s_s.dtypes) == 'int64' or str(s_s.dtypes) == 'float64' or str(s_s.dtypes) == 'int32' \1240                    or str(s_s.dtypes) == 'float32' or str(s_s.dtypes) == 'int' or str(s_s.dtypes) == 'float':1241                num_df_list.append('unknown_'+str(i))1242            elif str(s_s.dtypes) == 'int64' == 'str' or str(s_s.dtypes)  == 'Category':1243                cat_df_list.append('unknown_'+str(i))1244            column_info[i] = {}1245            column_info[i]['col_name'] = 'unknown_'+str(i)1246            column_info[i]['dtype'] = str(s_s.dtypes)1247            column_info[i]['content'] = s_s.values1248            column_info[i]['length'] = len(s_s.values)1249            column_info[i]['null_ratio'] = s_s.isnull().sum()/len(s_s.values)1250            column_info[i]['ctype'] = 1 if str(s_s.dtypes) == 'int64' or str(s_s.dtypes) == 'float64' or str(s_s.dtypes) == 'int32' \1251                    or str(s_s.dtypes) == 'float32' or str(s_s.dtypes) == 'int' or str(s_s.dtypes) == 'float' else 21252            column_info[i]['nunique'] = s_s.nunique()1253            column_info[i]['nunique_ratio'] = s_s.nunique()/len(s_s.values)1254            if column_info[i]['ctype'] == 1: #å¦ææ¯æ°åå1255                column_info[i]['mean'] = s_s.describe()['mean']1256                column_info[i]['std'] = s_s.describe()['std']1257                column_info[i]['min'] = s_s.describe()['min']1258                column_info[i]['25%'] = s_s.describe()['25%']1259                column_info[i]['50%'] = s_s.describe()['50%']1260                column_info[i]['75%'] = s_s.describe()['75%']1261                column_info[i]['max'] = s_s.describe()['max']1262                column_info[i]['median'] = s_s.median()1263                column_info[i]['mode'] = s_s.mode().iloc[0]1264                column_info[i]['mode_ratio'] = s_s.astype('category').describe().iloc[3]/column_info[i]['length']1265                column_info[i]['sum'] = s_s.sum()1266                column_info[i]['skew'] = s_s.skew()1267                column_info[i]['kurt'] = s_s.kurt()1268            elif column_info[i]['ctype']==2: #categoryå1269                column_info[i]['mean'] = 01270                column_info[i]['std'] = 01271                column_info[i]['min'] = 01272                column_info[i]['25%'] = 01273                column_info[i]['50%'] = 01274                column_info[i]['75%'] = 01275                column_info[i]['max'] = 01276                column_info[i]['median'] = 01277                column_info[i]['mode'] = 01278                column_info[i]['mode_ratio'] = 01279                column_info[i]['sum'] = 01280                column_info[i]['skew'] = 01281                column_info[i]['kurt'] = 01282                # for item in s_s.unique():1283                #     temp1 = [x for i, x in enumerate(s_s) if s_s.iat[0, i] == item]1284                #     column_info[i][item] = len(temp1)1285    ####load origin dataset#####1286    # print(dtypes.index)1287    # origin_code = get_code_txt(notebook_root + '/' + notebook_id + '.ipynb')1288    # if action[0] == 'Add':1289    #     data_object = get_data_object(result_id, action[1])1290    #     target_content = {1291    #         'operation': action[2],1292    #         'ope_type': 1,1293    #         'parameters': [],1294    #         'data_object': 'train',1295    #     }1296    #     new_code = addOperator(notebook_id, action[1], target_content)1297    # elif action[0] == 'Update':1298    #     data_object = get_data_object(result_id, action[1])1299    #     target_content = {1300    #         'operation': action[2],1301    #         'ope_type': 1,1302    #         'parameters': [],1303    #         'data_object': 'train',1304    #     }1305    #     new_code = changeOperator(notebook_id, action[1], target_content)1306    # elif action[0] == 'Delete':1307    #     new_code = deleteOperator(notebook_id, action[1])1308    #1309    # run_result = changed_running()1310    #1311def stat_colnum_and_uniques(ip,notebook_root='../spider/notebook',dataset_root_path='../spider/unzip_dataset'):1312    in_result = []1313    cursor, db = create_connection()1314    sql = 'select distinct notebook_id from result'1315    cursor.execute(sql)1316    sql_res = cursor.fetchall()1317    for row in sql_res:1318        in_result.append(int(row[0]))1319    sql = "select distinct pair.nid,pair.did from pair,dataset where pair.did=dataset.id and dataset.server_ip='" + ip +"' and dataset.isdownload=1"1320    print(sql)1321    cursor.execute(sql)1322    sql_res = cursor.fetchall()1323    count = 01324    col_num_sum = 01325    cat_sum = 01326    has_print = []1327    max_col_num = 01328    max_unique_num = 01329    sum_unique_ratio=01330    max_unique_ratio = 01331    max_length = 01332    sum_length = 01333    for row in sql_res:1334        # print(count)1335        if row[1] not in has_print:1336            # print('id:',row[1])1337            has_print.append(row[1])1338        else:1339            continue1340        # notebook_id=int(row[0])1341        file_list = os.listdir('../origindf/')1342        #1343        # if str(row[1])+'.npy' in file_list:1344        #     # print("already in")1345        #     continue1346        # if notebook_id not in in_result:1347        #     continue1348        if str(row[1])+'.npy' in file_list:1349            print(count)1350            count += 11351            origin_column_info = np.load('../origindf/' + str(row[1])+'.npy',allow_pickle=True).item()1352        else:1353            continue1354        # if origin_column_info == 'no origin df':1355        #     continue1356        # np.save('../origindf/' + str(row[1])+'.npy', origin_column_info)1357        # print(origin_column_info)1358        if len(origin_column_info) > max_col_num:1359            max_col_num = len(origin_column_info)1360        for col in origin_column_info:1361            if origin_column_info[col]['ctype'] == 2:1362                print('nunique:',origin_column_info[col]['nunique'])1363                cat_sum += origin_column_info[col]['nunique']1364                if origin_column_info[col]['nunique'] > max_unique_num:1365                    max_unique_num = origin_column_info[col]['nunique']1366        col_num_sum += len(origin_column_info)1367        sum_unique_ratio += origin_column_info[col]['nunique_ratio']1368        if origin_column_info[col]['nunique_ratio'] > max_unique_ratio:1369            max_unique_ratio = origin_column_info[col]['nunique_ratio']1370        sum_length += origin_column_info[col]['length']1371        if origin_column_info[col]['length'] > max_length:1372            max_length = origin_column_info[col]['length']1373        # cat_num_sum += col_num_sum1374    # print('count:', count)1375    if count == 0:1376        return1377    else:1378        print('mean_col_num:',col_num_sum/count)1379        print('max_col_num:', max_col_num)1380        print('mean_uniques:', cat_sum/col_num_sum)1381        print('max_unique_num:', max_unique_num)1382        print('mean_uniques_ratio:', sum_unique_ratio / col_num_sum)1383        print('max_unique_ratio:', max_unique_ratio)1384        print('mean_length:', sum_length / col_num_sum)1385        print('max_length:', max_length)1386    print(has_print)1387def save_origin_df(ip,notebook_root='../spider/notebook',dataset_root_path='../spider/unzip_dataset'):1388    in_result = []1389    cursor, db = create_connection()1390    sql = 'select distinct notebook_id from result'1391    cursor.execute(sql)1392    sql_res = cursor.fetchall()1393    for row in sql_res:1394        in_result.append(int(row[0]))1395    sql = "select distinct pair.nid,pair.did from pair,dataset where pair.did=dataset.id and dataset.server_ip='" + ip + "' and dataset.isdownload=1"1396    cursor.execute(sql)1397    sql_res = cursor.fetchall()1398    has_checked = []1399    nod = 01400    non = 01401    nsd = 01402    for row in sql_res:1403        file_list = os.listdir('../origindf/')1404        if row[1] in has_checked:1405            continue1406        has_checked.append(row[1])1407        if str(row[1])+'.npy' in file_list:1408            # print("already in")1409            continue1410        print('dataset_id:', row[1])1411        notebook_id= row[0]1412        origin_column_info = get_origin_data(notebook_id,notebook_root,dataset_root_path)1413        if origin_column_info == 'no origin df':1414            print('no origin df')1415            nod += 11416            continue1417        if origin_column_info == 'no such notebook':1418            non += 11419            print('no such notebook')1420            continue1421        if origin_column_info == 'no such dataset':1422            nsd += 11423            print('no such dataset')1424            continue1425        np.save('../origindf/' + str(row[1])+'.npy', origin_column_info)1426    print('nod:',nod)1427    print('non:',non)1428def get_model_dic():1429    cursor, db = create_connection()1430    sql = 'select distinct model_type, count(distinct notebook_id) from result group by model_type'1431    cursor.execute(sql)1432    sql_res = cursor.fetchall()1433    model_dic = {}1434    id=11435    for row in sql_res:1436        if row[1]<10 or row[0]=='str' or row[0]=='unknown' or row[0]=='list' or row[0]=='Pipeline' or row[0]=='cross_val_predict':1437            continue1438        model_dic[row[0]] = id1439        id+=11440    pprint.pprint(model_dic)1441    np.save('./model_dic', model_dic)1442if __name__ == '__main__':1443    dataset_name = ''1444    cursor, db = create_connection()1445    sql = 'select dataSourceUrl from pair,dataset where pair.did=dataset.id and pair.nid=7835272'1446    cursor.execute(sql)1447    sql_res = cursor.fetchall()1448    for row in sql_res:1449        if row[0] == 'None' or row[0] == None:1450            continue1451        dataset_name = row[0].split('/')[-1]1452        break1453    print(dataset_name)1454    # get_model_dic()1455    # sampling('Add',103681,173671,notebook_root='../notebook')1456    #1457    # ip = '10.77.70.128'1458    # if ip != '39.99.150.216':1459    #     notebook_root = '../notebook'1460    #     dataset_root = '../unzip_dataset'1461    #     save_origin_df(ip,notebook_root=notebook_root,dataset_root_path=dataset_root)1462    # else:1463    #     save_origin_df(ip)1464    # notebook_id = 168691465    # target_content = {1466    #     'operation': 'boxcox1p',1467    #     'ope_type': 4,1468    #     'parameters': [],1469    #     'data_object': 'y_test[\'floor\']'1470    # }1471    # # new_code = changeOperator(notebook_id,4,target_content)1472    # new_code = deleteOperator(notebook_id, 12,notebook_root_path='../spider/notebook/')1473    # code_list = new_code.split('\n')1474    # for index,line in enumerate(code_list):1475    #     print("\033[0;33;40m" + str(index)+':' + line + "\033[0m")...server.py
Source:server.py  
1# Flask libraries2import logging3from random import randint4from flask import Flask, render_template, session, request, Response5from flask_ask import Ask, statement, question6import jsonpickle7# Image libraries8import numpy as np9from PIL import Image10import cv211# Twitter12import twitter13# Other libraries14import datetime, time15import threading16import torch.multiprocessing as mp17import torch18import string19from random import *20import os21import atexit22# Global Variables23home_path = os.path.expanduser("~")24frame = None25image = None26username = None27greeting_nums = 2428bye_nums = 629# Twitter setup30consumer_key = 'iTl0HLBQxe8V4JksVXwu8Xwus'31consumer_secret = 'o7I8GEd8JesXN2m27bDpmNtT4ZewvNpJ9axGZCiNQPNHmTHFlG'32access_token_key = '974666982678294529-0Ho7jjlHkjVblXZeahFuBtueSZ2LO6n'33access_token_secret = 'IxvugPcrPmjoiPlA78h1zWToctLoR3dr0AXxsTCCU3Knd'34# Helper functions35def format_filename(s):36    valid_chars = "-_.() {}{}".format(string.ascii_letters, string.digits)37    filename = ''.join(c for c in s if c in valid_chars)38    filename = filename.replace(' ','_')39    return filename40def voice_mod(s):41    if isinstance(s, str) or isinstance(s, unicode):42        return "<speak><prosody pitch='+33.3%'>" + s + '</prosody></speak>'43app = Flask(__name__)44ask = Ask(app, "/")45logging.getLogger("flask_ask").setLevel(logging.DEBUG)46app.secret_key = 'ravioli ravioli give me the formuoli'47@app.route('/updateImage', methods=['POST'])48def image_update():49    r = request50    nparr = np.fromstring(r.data, np.uint8)51    global frame52    frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)53    # build a response dict to send back to client54    response = {'message': 'image received. size={}x{}'.format(frame.shape[1], frame.shape[0])}55    # encode response using jsonpickle56    response_pickled = jsonpickle.encode(response)57    return Response(response=response_pickled, status=200, mimetype="application/json")58@ask.launch59def welcome():60    msg = voice_mod(render_template('welcome'))61    reprompt = voice_mod(render_template('prompt'))62    return question(msg).reprompt(reprompt)63@ask.intent("SelfieIntent")64def selfie():65    msg = None66    global frame, image67    if frame is not None:68        image = frame69        msg = render_template('selfie_ok')70    else:71        msg = render_template('selfie_fail')72    msg = voice_mod(msg)73    return question(msg)74@ask.intent("UsernameIntent", mapping={'name': 'Name'})75def username(name):76    global username77    if isinstance(name, unicode):78        username = name79        msg = render_template('username', name=name)80    else:81        msg = render_template('username_fail')82    msg = voice_mod(msg)83    return question(msg)84@ask.intent("GreetingIntent")85def greeting():86    global username, greeting_nums87    name = username88    if name is None:89        name = ''90    msg = render_template('greeting_'+ str(randint(1, greeting_nums)), name=name)91    msg = voice_mod(msg)92    return question(msg)93@ask.intent("ExitIntent")94def bye():95    global username, bye_nums96    name = username97    if name is None:98        name = ''99    msg = render_template('bye_'+ str(randint(1, bye_nums)), name=name)100    msg = voice_mod(msg)101    reprompt = render_template('bye_reprompt')102    reprompt = voice_mod(reprompt)103    username = None104    return question(msg).reprompt(reprompt)105@ask.intent("ShowIntent", mapping={'name': 'Name', 'previous': 'Previous'})106def showImage(name, previous):107    global image108    msg = None109    print("Name: {}".format(name))110    print("Previous: {}".format(previous))111    # Show previous image112    if isinstance(previous, unicode):113        if (previous.lower() in ['last', 'previous', 'that']) and (image is not None):114            Image.fromarray(image).show()115            msg = render_template('show_image')116        else:117            msg = render_template('show_fail')118    # Find image in home folder119    elif isinstance(name, unicode):120        filt_name = format_filename(str(name).lower())121        imgPath = home_path + '/' + filt_name + ".png"122        if os.path.isfile(imgPath):123            Image.open(imgPath).show()124            msg = render_template('show_image')125        else:126            msg = render_template('find_fail')127    # Couldn't match anything128    else:129        msg = render_template('find_fail')130    msg = voice_mod(msg)131    return question(msg)132@ask.intent("NameIntent", mapping={'name': 'Name'})133def nameImage(name):134    global image135    msg = None136    # If fibi has already taken a selfie137    if image is not None:138        print(name)139        print(type(name))140        print('Received name: {}'.format(name))141        # If name is provided142        if isinstance(name, unicode):143            name = str(name).lower()144            filt_name = format_filename(name)145            print('Filtered name: {}'.format(filt_name))146            # If image with that filename already exists147            if os.path.isfile(home_path + '/' + filt_name + ".png"):148                msg = render_template('name_fail', name=name)149            # Else, try saving under that name150            else:151                try:152                    Image.fromarray(image).save(home_path + '/' + filt_name + ".png")153                    msg = render_template('name_image', name=name)154                except:155                    msg = render_template('name_fail', name=name)156        # Else, try another name157        else:158            msg = render_template('name_no')159    # Else, prompt user to take image160    else:161        msg = render_template('name_none')162    msg = voice_mod(msg)163    return question(msg)164#165# @ask.intent("TwitterIntent", mapping={'name': 'Name', 'previous': 'Previous'})166@ask.intent("TwitterIntent", mapping={'name': 'Name', 'previous': 'Previous'})167def tweetImage(name, previous):168    global image, consumer_key, consumer_secret, access_token_key, access_token_secret169    msg = None170    status = 'Posted by Fibi!'171    twitterApi = twitter.Api(consumer_key=consumer_key,172                             consumer_secret=consumer_secret,173                             access_token_key=access_token_key,174                             access_token_secret=access_token_secret)175    print('Received name: {}'.format(name))176    print(type(name))177    print('Received previous: {}'.format(previous))178    print(type(previous))179    # Tweet last image180    if isinstance(previous, str) and (previous.lower() in ['last', 'previous', 'that']):181        if image is not None:182            try:183                # Save last image in a temporary file184                print('Attempting to tweet...')185                imgPath = home_path + '/latestImage.png'186                print('Tweet successful')187                Image.fromarray(image).save(imgPath)188                # Open and tweet last image189                twitterApi.PostUpdate(status, media=imgPath)190                msg = render_template('tweet_ok')191            except:192                msg = render_template('tweet_fail')193        else:194            msg = render_template('find_fail')195    # Tweet specified image in home folder196    # elif isinstance(name, unicode):197    elif isinstance(name, unicode):198        name = str(name).lower()199        filt_name = format_filename(name)200        print('Filtered name: {}'.format(filt_name))201        imgPath = home_path + '/' + filt_name + ".png"202        if os.path.isfile(imgPath):203            try:204                # Open and tweet image from path205                print('Attempting to tweet...')206                f = open(imgPath, 'rb')207                twitterApi.PostUpdate(status, media=f)208                print('Tweet successful')209                msg = render_template('tweet_ok')210            except Exception as e:211                print(e)212                msg = render_template('tweet_fail')213        else:214            msg = render_template('find_fail')215    # Failed to find image with that name216    else:217        msg = render_template('find_fail')218    msg = voice_mod(msg)219    return question(msg)220@ask.intent("AMAZON.YesIntent")221def yes():222    msg = render_template('yes')223    msg = voice_mod(msg)224    return question(msg)225@ask.intent("AMAZON.NoIntent")226def no():227    msg = render_template('no')228    msg = voice_mod(msg)229    return question(msg)230@ask.intent("AMAZON.StopIntent")231def stop():232    msg = render_template('stop')233    msg = voice_mod(msg)234    return statement(msg)235@ask.intent("AMAZON.CancelIntent")236def cancel():237    msg = render_template('stop')238    msg = voice_mod(msg)239    return statement(msg)240@ask.intent("AMAZON.HelpIntent")241def  help():242    msg = render_template('help')243    msg = voice_mod(msg)244    return question(msg)245@ask.intent("AboutIntent")246def  about():247    msg = render_template('about')248    reprompt = render_template('about_reprompt')249    msg = voice_mod(msg)250    reprompt = voice_mod(reprompt)251    return question(msg)252    # return question(msg).reprompt(reprompt)...prefixtree.py
Source:prefixtree.py  
1FIND_SUC = 12FIND_FAIL = 03FIND_PART = 24class Node:5    def __init__(self, value=None, key=None):6        self.value = value7        self.key = key8        self.childs = {}9    def Append(self, key, value, depth=0):10        self.childs[key[depth]] = node = Node()#BAD BAD BAD!!!1111111        if depth == len(key) - 1:12            node.key = key13            node.value = value14            return True15        node.Append(key, value, depth + 1)16        17    def __str__(self, additional=''):18        res = '%s:%s' % (str(self.key), str(self.value))19        for k, v in self.childs.iteritems():20            res += '\n%s%s->%s' % (additional, k, v.__str__(additional + '   '))21        return res22    23    def Add(self, key, value):24       25       stack, res = self.Find(key)26       if res == FIND_SUC:27           stack[-1].value = value28           return29        30       depth = len(stack) - 131       if depth == len(key):32           stack[-1].value = value33           stack[-1].key = key34           return35       stack[-1].Append(key, value, len(stack) - 1)36    def Find(self, key, stack=None): 37        if not stack: stack = []38        stack.append(self)39        depth = len(stack) - 140        if self.key == key:41            return stack, FIND_SUC42        if (depth >= len(key)):43            return stack, FIND_FAIL44        45        if key[depth] in self.childs:46            return self.childs[key[depth]].Find(key, stack)47        if depth == 0:48            return stack, FIND_FAIL49        50        return stack, FIND_PART51'''52    def Add(self, key, value, depth=0):53        if depth >= len(key): return False54        stack, res = self.Find(key, depth)55        if res == FIND_SUC: return False56        node = stack[-1]57        node.Append(key, value, len(stack) - 1)58    def Find(self, key, depth=0):59        if self.key == key:60            return self, depth, FIND_SUC        61        if (depth >= len(key)):62            return self, depth, FIND_FAIL63        64        if key[depth] in self.childs:65            return self.childs[key[depth]].Find(key, depth + 1)66        else:67            return self, depth, FIND_PART68'''69class Tree:70    def __init__(self):71        self.root = Node()72    def __str__(self):73        return str(self.root)74    def Find(self, key):75        #s res = 76        return self.root.Find(key)77    def Add(self, key, value):78        self.root.Add(key, value)79'''80tree = Tree()81tree.Add('124444', 2)82tree.Add('126', 2)83tree.Add('g2', 2)84tree.Add('g', 2)85print tree...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
