Best Python code snippet using lisa_python
domain_classify.py
Source:domain_classify.py  
1# -*- coding: utf-8 -*-23import os4import re5import scws6import sys7import csv8import opencc9from global_utils_do import *1011sys.path.append('../../trans')12from trans import trans, traditional2simplified1314# cc = opencc.OpenCC('t2s', opencc_path='/usr/bin/opencc')15s = load_scws()1617def classify_by_biostring(bio_string):#æ ¹æ®ç¨æ·bio_stringåå1819    # bio_string_s = cc.convert(bio_string.decode('utf-8'))20    bio_string_s = bio_string.decode('utf-8')21    2223    kwdlist = bio_string_s.encode('utf-8')#cut(s, bio_string_s.encode('utf-8'))24    lawyerw_weight = sum([1 for keyword in lawyerw if keyword in kwdlist]) # å¾å¸25    adminw_weight = sum([1 for keyword in adminw if keyword in kwdlist]) # ç»ç»26    mediaw_weight = sum([1 for keyword in mediaw if keyword in kwdlist]) # åªä½27    businessw_weight = sum([1 for keyword in businessw if keyword in kwdlist]) # åä¸äººå£«28    govw_weight = sum([1 for keyword in govw if keyword in kwdlist]) # æ¿åºå®å29    mediaworkerw_weight = sum([1 for keyword in mediaworkerw if keyword in kwdlist]) # åªä½äººå£«30    universityw_weight = sum([1 for keyword in universityw if keyword in kwdlist]) # 髿 ¡3132    max_weight = 033    label = 'other'34    #equal_list = []35    36    if max_weight < businessw_weight:37        max_weight = businessw_weight38        label = 'business'39##        equal_list = ['business']40##    else:41##        pass4243    if max_weight < adminw_weight:44        max_weight = adminw_weight45        label = 'admin'46##        equal_list = ['admin']47##    elif max_weight == adminw_weight:#妿ç¸ç48##        equal_list.append('admin')49##    else:50##        pass5152    if max_weight < mediaw_weight:53        max_weight = mediaw_weight54        label = 'media'55##        equal_list = ['media']56##    elif max_weight == mediaw_weight:#妿ç¸ç57##        equal_list.append('media')58##    else:59##        pass6061    if max_weight < lawyerw_weight:62        max_weight = lawyerw_weight63        label = 'lawyer'64##        equal_list = ['lawyer']65##    elif max_weight == lawyerw_weight:#妿ç¸ç66##        equal_list.append('lawyer')67##    else:68##        pass6970    if max_weight < govw_weight:71        max_weight = govw_weight72        gov = 'politician'73##        equal_list = ['politician']74##    elif max_weight == govw_weight:#妿ç¸ç75##        equal_list.append('politician')76##    else:77##        pass7879    if max_weight < mediaworkerw_weight:80        max_weight = mediaworkerw_weight81        label = 'mediaworker'82##        equal_list = ['mediaworker']83##    elif max_weight == mediaworkerw_weight:#妿ç¸ç84##        equal_list.append('mediaworker')85##    else:86##        pass8788    if max_weight < universityw_weight:89        max_weight = universityw_weight90        label = 'university'91##        equal_list = ['university']92##    elif max_weight == universityw_weight:#妿ç¸ç93##        equal_list.append('university')94##    else:95##        pass9697##    if len(equal_list) > 2:98##        label = 'other'99##    elif len(equal_list) == 2:100##        l1 = equal_list[0]101##        l2 = equal_list[1]102##        if DICT_LENGTH[l1] > DICT_LENGTH[l2]:103##            label = l2104##        elif DICT_LENGTH[l1] < DICT_LENGTH[l2]:105##            label = l1106##        else:107##            pass108##    else:109##        pass110111    return label112113def classify_inner_outer(location):114115    # bio_string_s = traditional2simplified(location.decode('utf-8'))116    bio_string_s = location.decode('utf-8')117    new_location = bio_string_s.encode('utf-8')118    flag = 0119    for city in inner_city:120        if city in new_location:121            flag = 1122            break123124    return flag125126def domain_main(user_data):#twitterç¨æ·èº«ä»½åç±»ä¸»å½æ°127    '''128        è¾å
¥æ°æ®ï¼129        user_dataç¨æ·æ°æ®åå
¸ï¼{'uid':{'description':description,'username':username,'location':location,'number_of_text':number of text}...}130        description:twitterç¨æ·èæ¯ä¿¡æ¯ä¸çdescriptionãæ³¨æï¼æé¨åå
容æ¯è±æï¼éè¦è½¬æ¢æä¸æ131        username:twitterç¨æ·èæ¯ä¿¡æ¯ä¸çusername132        location:twitterç¨æ·èæ¯ä¿¡æ¯ä¸çlocationãæ³¨æï¼æé¨åå
容æ¯è±æï¼éè¦è½¬æ¢æä¸æ133        number_of_text:ç¨æ·æè¿7天å叿°é134135        è¾åºæ°æ®ï¼136        user_labelç¨æ·èº«ä»½åå
¸:{'uid':label,'uid':label...}137    '''138    if len(user_data) == 0:139        return {}140141    user_label = dict()142    for k,v in user_data.iteritems():143        label = 'other'144        try:145            description = v['description']146        except KeyError:147            description = ''148        try:149            username = v['username']150        except KeyError:151            username = ''152        try:153            location = v['location']154        except KeyError:155            location = ''            156        try:157            number_of_text = v['number_of_text']158        except KeyError:159            number_of_text = 0160        161        bio_string = username + '_' + description162        #æ ¹æ®bio_stringåå163        if len(bio_string) > 1:164            label = classify_by_biostring(bio_string)165166        if label == 'admin':#ç»ç»167            if location:168                flag = classify_inner_outer(location)169                if flag == 1:#å¢å
170                    label = 'inner_admin'171                else:172                    label = 'outer_admin'173            else:174                label = 'admin'175        elif label == 'media':#åªä½176            if location:177                flag = classify_inner_outer(location)178                if flag == 1:#å¢å
179                    label = 'inner_media'180                else:181                    label = 'outer_media'182            else:183                label = 'media'184        else:185            pass186        187        if label != 'other':188            user_label[k] = label189            continue190191        #æ ¹æ®å叿°éå¤å®192        if number_of_text >= ACTIVE_COUNT:193            label = 'active'194        user_label[k] = label195196    return user_label197      
...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
