Best Python code snippet using molotov_python
usa_senators_reps_twitter_audits.py
Source:usa_senators_reps_twitter_audits.py  
1"""2d:3cd D:\2020\coding\twitter_bot_tweepy4sqlite3 usa_fed_politician_tweets_2.db5dir *.db678#use this to run9d:10cd D:\2020\coding\twitter_bot_tweepy11env_may2022\Scripts\activate.bat12python131415pip install xlrd16https://xlrd.readthedocs.io/en/latest/171819https://ucsd.libguides.com/congress_twitter/home20congress_twitter_040722.xls2122#sql audit queries23select count(*) from user_completed;24select count(*) from err_userid;25select count(*) from users_liking_dict;26select count(*) from err_info_in_tweets;27select count(*) from tweet_dict;28select count(*) from user_completed;29select count(*) from tweet_likes_completed;3031select userID, count(*) as count from completed group by userID;32select userID, count(*) as count from err_info_in_tweets group by userID;33select userID, count(*) as count from err_userid group by userID;34select userID, count(*) as count from completed group by userID;35select userID, tweet_id, count(*) as count from tweet_likes_completed group by userID, tweet_id;3637"""38import urllib.request39import json40import xlrd41import os42import datetime43import time44import tweepy as tw45import pandas as pd4647import sqlite348from sqlite3 import Error49from datetime import datetime5051import requests52#nb: should have these keys loading from environment variables configured outside source code in repo.53#omitting the keys is a temporary fix. 54bearer_token=""55consumer_key=""56consumer_secret=""57access_token=""58access_token_secret=""59#nb: this uses an older method. lazy reuse.60auth = tw.OAuthHandler(consumer_key, consumer_secret)61auth.set_access_token(access_token, access_token_secret)62api = tw.API(auth, wait_on_rate_limit=True)63#64client = tw.Client( bearer_token=bearer_token,65                        consumer_key=consumer_key,66                        consumer_secret=consumer_secret,67                        access_token=access_token,68                        access_token_secret=access_token_secret,69                        return_type = requests.Response,70                        wait_on_rate_limit=True)7172db_filename="usa_fed_politician_tweets_2.db"73conn = sqlite3.connect(db_filename)74757677twitter_names = []7879wb = xlrd.open_workbook('congress_twitter_040722.xls')80sheet_names = wb.sheet_names()81for sheet_name in sheet_names:82    sheet = wb.sheet_by_name(sheet_name)83    print("\n")84    print("Number of Rows: ", sheet.nrows)85    print("Number of Columns: ",sheet.ncols)86    for j in range(2,sheet.nrows):87        twitter_name = sheet.cell(j,4).value88        twitter_name89        if len(twitter_name)>0:90            twitter_names.append(twitter_name)9192num_accounts = len(twitter_names)93print("Number of twitter accounts :", num_accounts)949596def connect(host='http://google.com'):97    try:98        urllib.request.urlopen(host) #Python 3.x99        return True100    except:101        return False102103# test104print( "connected" if connect() else "no internet!" )105106107for i in range(13, num_accounts):108    try:109        userID = twitter_names[i]110        print("userID:", userID)111        tweets = api.user_timeline(screen_name=userID,112                                   # 200 is the maximum allowed count113                                   count=200,114                                   include_rts = False,115                                   # Necessary to keep full_text116                                   # otherwise only the first 140 words are extracted117                                   tweet_mode = 'extended'118                                   )119        num_tweets = len(tweets)120        print("# of tweets:", num_tweets)121        #for info in tweets[:1]:122        #for info in tweets:123        for j in range(num_tweets):124            info = tweets[j]125            try:126                #dir(info)127                print("userID:", userID)128                print("ID: {}".format(info.id))129                print("created_at:", info.created_at)130                print("full_text:", info.full_text)131                print("retweet_count:", info.retweet_count)132                print("favorite_count:", info.favorite_count)133                print("\n")134                liking_users = client.get_liking_users(str(info.id))135                record_time = datetime.now()136                users_liking = json.loads(liking_users.content.decode("utf-8") )['data']137                num_liking_tweet = len(users_liking)138                print("num_liking:", num_liking_tweet)139                tweet_dict = {140                    "userID":userID,141                    'tweet_id':str(info.id),142                    "created_at":info.created_at.strftime("%m/%d/%Y, %H:%M:%S"),143                    "record_time":record_time,144                    "full_text":info.full_text,145                    "retweet_count":info.retweet_count,146                    "favorite_count":info.favorite_count,147                    "num_liking_tweet":num_liking_tweet,148                }149                print("tweet_dict:", tweet_dict)150                df_temp = pd.DataFrame(tweet_dict, index=[0])151                df_temp.to_sql("tweet_dict", conn, schema=None, index=False, if_exists='append')152                num_users_linking = len(users_liking)153                #for user_liking in users_liking:154                for k in range(num_users_linking):155                    user_liking = users_liking[k]156                    try:157                        record_time = datetime.now()158                        users_liking_dict = {159                            "tweet_id":user_liking['id'],160                            "name":user_liking['name'],161                            "username":user_liking['username'],162                            "record_time":record_time,163                        }164                        print("users_liking_dict:", users_liking_dict)165                        df_temp = pd.DataFrame(users_liking_dict, index=[0])166                        df_temp.to_sql("users_liking_dict", conn, schema=None, index=False, if_exists='append')167                    except Exception as e:168                        record_time = datetime.now()169                        err_dict = {170                            "userID":userID,171                            "user_liking":user_liking,172                            "error":str(e),173                            "record_time":record_time,174                        }175                        print("err_user_liking:", err_dict)176                        df_err = pd.DataFrame(err_dict, index=[0])177                        df_err.to_sql("err_user_liking", conn, schema=None, index=False, if_exists='append')178                        if not connect():179                            print("internet not connected.")180                            df_err = pd.DataFrame(err_dict, index=[0])181                            df_err.to_sql("no_internet", conn, schema=None, index=False, if_exists='append')182                            sleep(60)183                #184                record_time = datetime.now()185                completed_dict = {186                    "userID":userID,187                    "tweet_id":str(info.id),188                    "num_likes":num_users_linking,189                    "record_time":record_time,190                }191                print("completed_dict:", completed_dict)192                df_completed = pd.DataFrame(completed_dict, index=[0])193                df_completed.to_sql("tweet_likes_completed", conn, schema=None, index=False, if_exists='append')194            except Exception as e:195                record_time = datetime.now()196                err_dict = {197                    "userID":userID,198                    "info_id":str(info.id),199                    "error":str(e),200                    "record_time":record_time,201                }202                print("err_info_in_tweets:", err_dict)203                df_err = pd.DataFrame(err_dict, index=[0])204                df_err.to_sql("err_info_in_tweets", conn, schema=None, index=False, if_exists='append')205                if not connect():206                    print("internet not connected.")207                    record_time = datetime.now()208                    df_err = pd.DataFrame(err_dict, index=[0])209                    df_err.to_sql("no_internet", conn, schema=None, index=False, if_exists='append')210                    ##move counter back one unit211                    sleep(60)212        record_time = datetime.now()213        completed_dict = {214            "userID":userID,215            "num_tweets":len(tweets),216            "record_time":record_time,217        }218        print("completed_dict:", completed_dict)219        df_completed = pd.DataFrame(completed_dict, index=[0])220        df_completed.to_sql("user_completed", conn, schema=None, index=False, if_exists='append')221    except Exception as e:222        record_time = datetime.now()223        err_dict = {224            "userID":userID,225            "error":str(e),226            "record_time":record_time,227        }228        print("err_userid:", err_dict)229        df_err = pd.DataFrame(err_dict, index=[0])230        df_err = pd.DataFrame(err_dict, index=[0])231        df_err.to_sql("err_userid", conn, schema=None, index=False, if_exists='append')232    #233234completed_dict = {235    "record_time" = datetime.now(),236}237print("completed_dict:", completed_dict)238df_completed = pd.DataFrame(completed_dict, index=[0])
...tasks.py
Source:tasks.py  
1from E_Energy.celeryapp import app2from .models import CachingData, Data, AdapterParameters, Records, CachingRecord3from datetime import datetime4from django.forms.models import model_to_dict5from django.core.exceptions import ObjectDoesNotExist6def preparation_dict(instance):7    dict = model_to_dict(instance)8    dict['record_id'] = dict.pop('id_record')9    dict['adapter_id'] = dict.pop('id_adapter_id')10    return dict11def preparation_record_dict(instance):12    dict = model_to_dict(instance)13    dict['record_id'] = dict.pop('id_record')14    dict['adapter_id'] = dict.pop('id_adapter')15    return dict16@app.task(time_limit=115)17def data_caching():18    #try:19    last_record_id = CachingData.objects.latest('record_time').record_id20    print(last_record_id)21    print(CachingData.objects.latest('record_time').record_time)22    last_caching_record = Records.objects.get(id_record = last_record_id )23    print(last_caching_record.record_time)24    #except AttributeError:25    #    last_caching_record = Records(record_time=datetime(2020, 1,26,0,1))26    new_record_instances = Records.objects.filter(27        record_time__gt=last_caching_record.record_time28    ).order_by('record_time')29    for record_instance in new_record_instances.iterator(chunk_size=50):30        new_data_instances = list(31            Data.objects.filter(id_record=record_instance.id_record)32        )33        print(record_instance.id_record)34        print(record_instance.record_time)35        for data_instance in new_data_instances:36            Data.objects.create(37                record_time=record_instance.record_time,38                adapter_id=record_instance.id_adapter.id_adapter,39                **preparation_dict(data_instance)40            )41@app.task(time_limit=115)42def records_caching():43    try:44        last_record_id = CachingRecord.objects.latest('record_time').record_id45        print(last_record_id)46        print(CachingRecord.objects.latest('record_time').record_time)47        last_caching_record = Records.objects.get(id_record = last_record_id )48        print(last_caching_record.record_time)49    except (AttributeError, ObjectDoesNotExist):50       last_caching_record = Records(record_time=datetime(2020, 1,26,0,1))51    new_input_records = Records.objects.filter(52        record_time__gt=last_caching_record.record_time,53        id_adapter__adapter_name__icontains='вÑ
од'54    ).order_by('record_time')55    for input_record in new_input_records.iterator(chunk_size=200):56        input_datas = list(57            Data.objects.filter(id_record=input_record.id_record)58        )59        input_params = AdapterParameters.objects.filter(60            id_adapter = input_record.id_adapter_id)61        try:62            output_record = Records.objects.get(63                id_adapter__adapter_name__icontains='вÑÑ
од',64                id_adapter__device__in=input_record.id_adapter.device_set.all(),65                record_time=input_record.record_time66            )67            output_datas = list(68                Data.objects.filter(id_record=output_record.id_record)69            )70            output_params = AdapterParameters.objects.filter(71                id_adapter = output_record.id_adapter_id)72        except ObjectDoesNotExist:73            output_record = Records()74            output_datas = []75            output_params = []76        p_AU1 = next(77            (data.measure_value for data in input_datas \78                if data.id_parameter==next(79                    (p.id_parameter for p in input_params \80                        if 'ÐапÑÑжение ÑÐ°Ð·Ñ 1' in p.parameter_name),81                    0)),82            0)83        p_BU1 = next(84            (data.measure_value for data in input_datas \85                if data.id_parameter==next(86                    (p.id_parameter for p in input_params \87                        if 'ÐапÑÑжение ÑÐ°Ð·Ñ 2' in p.parameter_name),88                    0)),89            0)90        p_CU1 = next(91            (data.measure_value for data in input_datas \92                if data.id_parameter==next(93                    (p.id_parameter for p in input_params \94                        if 'ÐапÑÑжение ÑÐ°Ð·Ñ 3' in p.parameter_name),95                    0)),96            0)97        p_AI1 = next(98            (data.measure_value for data in input_datas \99                if data.id_parameter==next(100                    (p.id_parameter for p in input_params \101                        if 'Ток ÑÐ°Ð·Ñ 1' in p.parameter_name),102                    0)),103            0)104        p_BI1 = next(105            (data.measure_value for data in input_datas \106                if data.id_parameter==next(107                    (p.id_parameter for p in input_params \108                        if 'Ток ÑÐ°Ð·Ñ 2' in p.parameter_name),109                    0)),110            0)111        p_CI1 = next(112            (data.measure_value for data in input_datas \113                if data.id_parameter==next(114                    (p.id_parameter for p in input_params \115                        if 'Ток ÑÐ°Ð·Ñ 3' in p.parameter_name),116                    0)),117            0)118        p_AU2 = next(119            (data.measure_value for data in output_datas \120                if data.id_parameter==next(121                    (p.id_parameter for p in output_params \122                        if 'ÐапÑÑжение ÑÐ°Ð·Ñ 1' in p.parameter_name),123                    0)),124            0)125        p_BU2 = next(126            (data.measure_value for data in output_datas \127                if data.id_parameter==next(128                    (p.id_parameter for p in output_params \129                        if 'ÐапÑÑжение ÑÐ°Ð·Ñ 2' in p.parameter_name),130                    0)),131            0)132        p_CU2 = next(133            (data.measure_value for data in output_datas \134                if data.id_parameter==next(135                    (p.id_parameter for p in output_params \136                        if 'ÐапÑÑжение ÑÐ°Ð·Ñ 3' in p.parameter_name),137                    0)),138            0)139        p_AI2 = next(140            (data.measure_value for data in output_datas \141                if data.id_parameter==next(142                    (p.id_parameter for p in output_params \143                        if 'Ток ÑÐ°Ð·Ñ 1' in p.parameter_name),144                    0)),145            0)146        p_BI2 = next(147            (data.measure_value for data in output_datas \148                if data.id_parameter==next(149                    (p.id_parameter for p in output_params \150                        if 'Ток ÑÐ°Ð·Ñ 2' in p.parameter_name),151                    0)),152            0)153        p_CI2 = next(154            (data.measure_value for data in output_datas \155                if data.id_parameter==next(156                    (p.id_parameter for p in output_params \157                        if 'Ток ÑÐ°Ð·Ñ 3' in p.parameter_name),158                    0)),159            0)160        total_power = (p_AU1*p_AI1 + p_BU1*p_BI1 + p_CU1*p_CI1)/60161        x1 = p_AI1*p_AU2/60162        x2 = p_AI2*p_AU1/60163        x3 = p_BI1*p_BU2/60164        x4 = p_BI2*p_BU1/60165        x5 = p_CI1*p_CU2/60166        x6 = p_CI2*p_CU1/60167        x0 = x1+x3+x5168        x8 = x2+x4+x6169        xh = x0/x8*100 if x8 !=0 else 0170        xp = 100-xh171        CachingRecord.objects.create(172            p_AU1=p_AU1,173            p_BU1=p_BU1,174            p_CU1=p_CU1,175            p_AI1=p_AI1,176            p_BI1=p_BI1,177            p_CI1=p_CI1,178            p_AU2=p_AU2,179            p_BU2=p_BU2,180            p_CU2=p_CU2,181            p_AI2=p_AI2,182            p_BI2=p_BI2,183            p_CI2=p_CI2,184            total_power=total_power,185            x1=x1,186            x2=x2,187            x3=x3,188            x4=x4,189            x5=x5,190            x6=x6,191            x0=x0,192            x8=x8,193            xh=xh,194            xp=xp,195            **preparation_record_dict(input_record)196        )...weight_record_features.py
Source:weight_record_features.py  
1'''2user_id,weight,record_on,date31,56.000,2013-02-28,2013-02-2841,84.000,2013-03-17,2013-03-1751,63.900,2013-03-18,2013-03-1861,70.200,2013-10-01,2013-10-0171,66.000,2013-10-31,2013-10-3181,62.000,2014-01-11,2014-01-1192,61.500,2013-06-16,2013-06-16102,62.000,2013-06-17,2013-06-17112,62.000,2013-07-07,2013-07-07122,62.000,2013-08-04,2013-08-04132,61.600,2013-08-26,2013-08-3114'''15from datetime import datetime16import numpy as np17import pickle18import pandas as pd19import os20def weight_change_record_frequency_analysis(record_file, output_dir):21    record_frequency = dict()22    with open(record_file,'r') as recf:23        for line in recf:24            line = line.rstrip("\n").split(",")25            if line[0] != "user_id":26                usr = line[0]27                record_time_ = line[2].split("-")28                try:29                    record_time = datetime(int(record_time_[0]), int(record_time_[1]), int(record_time_[2]))30                except:31                    continue32                if usr not in record_frequency: #initialize user data33                    record_frequency[usr] = [1,[record_time,record_time]] #number of record, time intervals (earliest, latest)34                else: #update user data35                    if record_time < datetime(2008, 1, 1): #BOOHEE was found in 2008, any records before this date should be dropped36                        continue37                    if record_time > datetime(2015,12,31):38                        continue39                    if (record_time-record_frequency[usr][-1][0]).days < -30:40                        continue41                    record_frequency[usr][0] += 142                    if (record_time-record_frequency[usr][-1][0]).days <= 0: #update earliest43                        record_frequency[usr][-1][0] = record_time44                    if record_time >= record_frequency[usr][-1][1]: #update latest45                        if (record_time-record_frequency[usr][-1][1]).days < 30:46                            record_frequency[usr][-1][1] = record_time47                        else:48                            record_frequency[usr].append([record_time,record_time])49    for key, value in record_frequency.items():50        interval_num = len(value)-151        time_span = 052        for intv in value[1:]:53            time_span += ((intv[1]-intv[0]).days+1)54        record_frequency[key] = [value[0],time_span,interval_num]55        #if value[0]/time_span > 200:56        #    print(key)57    with open(output_dir+"/weight_change_record_frequency.pkl",'wb') as outf:58        pickle.dump(record_frequency,outf)59def plot_frequency_distribution(record_pkl_file):60    import matplotlib.pyplot as plt61    with open(record_pkl_file,'rb') as infile:62        frequency_record = pickle.load(infile)63    record_number = list()64    record_time_span = list()65    record_interval = list()66    for value in frequency_record.values():67        record_number.append(value[0])68        record_time_span.append(value[1])69        record_interval.append(value[2])70    record_number = np.array(record_number)71    record_interval = np.array(record_interval)72    record_time_span = np.array(record_time_span).astype(float)73    record_density = record_number/record_time_span74    fig,axes = plt.subplots(2,2,figsize=(12,12))75    fig.suptitle("User weight record distribution analysis")76    bins1 = np.array([0,]+list(np.logspace(0,np.log10(2000))))77    axes[0][0].hist(record_number, histtype="stepfilled",alpha=0.6,log=True,bins=bins1)78    axes[0][0].set_title("record number distribution (number of records)")79    axes[0][0].set_xscale('log', basex=10)80    bins2 = np.array([0,]+list(np.logspace(0,np.log10(3000))))81    axes[0][1].hist(record_time_span, histtype="stepfilled",alpha=0.6,log=True,bins=bins2)82    axes[0][1].set_title("record time span (only count those with in intervals) distribution (days)")83    axes[0][1].set_xscale('log', basex=10)84    axes[1][0].hist(record_density, histtype="stepfilled",alpha=0.6,log=True,bins=50)85    axes[1][0].set_title("record density distribution (with in intervals) (record/day)")86    axes[1][1].hist(record_interval, histtype="stepfilled",alpha=0.6,log=True,bins=np.arange(0,35,1))87    axes[1][1].set_title("record interval distribution (number of intervals)")88    plt.savefig('record_frequency_distribution',bbox_inches='tight')89    plt.close()90def get_weight_record_features(record_file, output_dir, user_lists='all'):91    """92    :return: N(nodes) by 4 matrix of weight record features of selected users93    """94    record_frequency = dict()95    out_dict = dict()  # userID: [number of records, span, density, number of intervals]96    with open(record_file, 'r') as recf:97        for line in recf:98            # print(line)99            # exit(-1)100            line = line.rstrip("\n").split(",")101            if line[0] != "user_id":102                usr = line[0]103                record_time_ = line[2].split("-")104                try:105                    record_time = datetime(int(record_time_[0]), int(record_time_[1]), int(record_time_[2]))106                except:107                    continue108                if user_lists != 'all' and usr not in user_lists:109                    continue110                else:111                    if usr not in record_frequency:  # initialize user data112                        record_frequency[usr] = [1, [record_time,113                                                     record_time]]  # number of record, time intervals (earliest, latest)114                    else:  # update user data115                        if record_time < datetime(2008, 1,116                                                  1):  # BOOHEE was found in 2008, any records before this date should be dropped117                            continue118                        if record_time > datetime(2015, 12, 31):119                            continue120                        if (record_time - record_frequency[usr][-1][0]).days < -30:121                            continue122                        record_frequency[usr][0] += 1123                        if (record_time - record_frequency[usr][-1][0]).days <= 0:  # update earliest124                            record_frequency[usr][-1][0] = record_time125                        if record_time >= record_frequency[usr][-1][1]:  # update latest126                            if (record_time - record_frequency[usr][-1][1]).days < 30:127                                record_frequency[usr][-1][1] = record_time128                            else:129                                record_frequency[usr].append([record_time, record_time])130    for key, value in record_frequency.items():131        interval_num = len(value) - 1132        time_span = 0133        for intv in value[1:]:134            time_span += ((intv[1] - intv[0]).days + 1)135        record_frequency[key] = [value[0], time_span, interval_num, float(value[0])/time_span]136    # print(record_frequency)137    df = pd.DataFrame.from_dict(record_frequency)138    df.to_csv(os.path.join(output_dir, 'weight_record_features.csv'), index=False)139if __name__ == '__main__':140    # weight_change_record_frequency_analysis('../data/WeightLoss/weight_record_consolidated.csv','../data/WeightLoss/clean/')141    # plot_frequency_distribution('../data/WeightLoss/clean/weight_change_record_frequency.pkl')...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
