Best Python code snippet using radish
1_extractingrequiredsignalsandtimeoverlapwithicustay.py
Source:1_extractingrequiredsignalsandtimeoverlapwithicustay.py  
1# -*- coding: utf-8 -*-2"""1_ExtractingRequiredSignalsandTimeOverlapWithICUStay.ipynb3Automatically generated by Colaboratory.4Original file is located at5    https://colab.research.google.com/drive/1WlVZsHEAe1H4sLDaFeTUTI1lwymSx_X26"""7# This notebook does the following:8# 1. Extract the SUBJECT_IDs for whom a record edxists in the MIMIC matched waveform dataset (total number 4,653) from a 'df_waveform_exists.csv' CSV file into a dataframe9# 2. Extract all the unique signals that are recorded for above patients and generate a 'physio_signals.txt' file.10# 3. Check for which SUBJECT_IDs there exists all relevant signals i.e. HR, SPO2%, ABP SYS, ABP DIAS, ABP MEAN, and RESP. If there exists all signals11#    insert '1' in the 'sig_exists' column for that particular patient in the dataframe.12# 4. Considering the SUBJECT_IDs for which there exists all relevant signals in the database, 13#    check if the recordings in the database overlap with their ICU stays i.e. check if there is a overlap between the recording start date and end date 14#    and ICU intime and outtime. If there exists a overlap, insert '1' in the 'timeoverlap' column for that particular patient in the dataframe.15# 5. Generate a final CSV 'df_TS_exists_withoutTEMP_overlapcount.csv'16# 6. There are a few sample records, 1 for once per second sampling frequency and 1 for once per minute frequency. Each of these are read from the17#    matched waveform database and we extract only required HR, SPO2%, ABP SYS, ABP DIAS, ABP MEAN,& RESP signals and insert it into a dataframe. For sample18#    with onec per second sampling frequency, the data is converted to once per minute by calculating average for each 60 secs.19# Commented out IPython magic to ensure Python compatibility.20!pip install wfdb21import io22import pandas as pd23from IPython.display import display24import matplotlib.pyplot as plt25# %matplotlib inline26import numpy as np27import os28import shutil29import posixpath30import wfdb31import urllib.request32import datetime33from google.colab import files34uploaded = files.upload()35df_csvdata = pd.read_csv(io.BytesIO(uploaded['df_waveform_exists.csv']))36# Dataset is now stored in a Pandas Dataframe37print (df_csvdata.shape)38df_csvdata['sig_exists']='';39df_csvdata['timeoverlap']='';40df_csvdata['Number_of_overlaping_records'] =''41print (df_csvdata.shape)42#To get a list of all the unique signals present in PHYSIOBANK43from collections import namedtuple44for index, row in df_csvdata.iterrows():45    #print(row['subject_id'], row['icustay_id'], row['sepsis_onsettime'],row['intime'],row['outtime'])46    signals_in_pyhisobank=[]47    wdb_dir_path = 'mimic3wdb/matched/p'+ str(row['subject_id']).zfill(6)[:2] + '/p' + str(row['subject_id']).zfill(6) + '/';48    wdb_path_toAllRecords = 'https://archive.physionet.org/physiobank/database/'+ wdb_dir_path + 'RECORDS';49    wdb_records =  urllib.request.urlopen(wdb_path_toAllRecords);    50    for lines in wdb_records.readlines():51      record = lines.decode("utf-8"); 52      record = str(record).rstrip()53      #print (record[-1:])54      if record[-1:] == 'n':55        #print(record);56        #print (wdb_dir_path);57        record = str(record).rstrip()58        try:59          print(row['subject_id'])60          signals,fields = wfdb.rdsamp(record, pb_dir=wdb_dir_path) ;                 61          for i in fields['sig_name']:62            if i not in signals_in_pyhisobank:63              signals_in_pyhisobank.append(i);64          65        except ValueError:66          print('Error occured while reading waveform: ', record);67"""68#testing date part69        70       71            72        Range = namedtuple('Range', ['start', 'end'])73        print ('intime :', datetime.datetime.strptime(row['intime'],'%Y-%m-%d %H:%M:%S') ); 74               75        print ('outtime :', datetime.datetime.strptime(row['outtime'],'%Y-%m-%d %H:%M:%S') ); 76        print ('record starttime' , record_starttime);77        print ('record endtime' , record_endtime);78        r1 = Range(start= datetime.datetime.strptime(row['intime'],'%Y-%m-%d %H:%M:%S'), end= datetime.datetime.strptime(row['outtime'],'%Y-%m-%d %H:%M:%S'))79        r2 = Range(start= record_starttime, end = record_endtime)80        latest_start = max(r1.start, r2.start)81        earliest_end = min(r1.end, r2.end)82        delta = (earliest_end - latest_start).days + 183        if delta > 0 :84          print('NO OVERLAP BETWEEN RECORD DATETIME AND ICU STAY DATETIME')85          df_csvdata['TS_exists'] = 1;86        #overlap = max(0, delta) #to find exact overlap days87"""88print(len(signals_in_pyhisobank))89with open("physio_signals.txt", "w") as output:90    output.write(str(signals_in_pyhisobank))91from google.colab import files92files.download('physio_signals.txt')93#### IMPORTANT ! DO NOT DELETE94#To get patients for whom required signals exists and there are TS records for the ICU stay in consideration95from collections import namedtuple96for index, row in df_csvdata.iterrows():97    #print(row['subject_id'], row['icustay_id'], row['sepsis_onsettime'],row['intime'],row['outtime'])98    records_toRead=[]99    wdb_dir_path = 'mimic3wdb/matched/p'+ str(row['subject_id']).zfill(6)[:2] + '/p' + str(row['subject_id']).zfill(6) + '/';100    wdb_path_toAllRecords = 'https://archive.physionet.org/physiobank/database/'+ wdb_dir_path + 'RECORDS';101    wdb_records =  urllib.request.urlopen(wdb_path_toAllRecords);   102    count_overlaping_records = 0 103    for lines in wdb_records.readlines():104      record = lines.decode("utf-8"); 105      record = str(record).rstrip()106      #print (record[-1:])107      if record[-1:] == 'n':108        #print(record);109        #print (wdb_dir_path);110        record = str(record).rstrip()111        try:112          signals,fields = wfdb.rdsamp(record, pn_dir=wdb_dir_path) ; 113          #wfdb.plot_items(signal=signals, fs=fields['fs'])114          #display(signals)115          #display(fields)116          #print ('fs' , fields['fs']);117          #print ('signal length',fields['sig_len']);118          #print ('date' ,fields['base_date'] );        119          #print ('time' ,fields['base_time'] );120          #print ('%.3f'%(fields['fs']))121          122          list_sig_name = [item.upper().replace(' ','') for item in fields['sig_name']]123          sig_exist_1 = all(x in list_sig_name for x in ['HR', 'SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP']);  #%SpO2124          sig_exist_2 = all(x in list_sig_name for x in ['HR', '%SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP']); 125          if ((sig_exist_1 == True) or (sig_exist_2 == True)) :126            df_csvdata.loc[index,'sig_exists'] = 1;127            record_starttime = datetime.datetime.combine(fields['base_date'] ,fields['base_time'] ) ;128            129            if  '%.3f'%(fields['fs']) == '1.000' :130              record_endtime = record_starttime + datetime.timedelta(seconds= (fields['sig_len']-1)) ;131            elif '%.3f'%(fields['fs'])== '0.017' :132              record_endtime = record_starttime + datetime.timedelta(minutes = (fields['sig_len']-1)) ;133            else : 134              print('ERROR IN SAMPLING');135              print(record);136              print (wdb_dir_path);137            #Caculate if we have a recording for the time of icu stay138            Range = namedtuple('Range', ['start', 'end'])139            r1 = Range(start= datetime.datetime.strptime(row['intime'],'%Y-%m-%d %H:%M:%S'), end= datetime.datetime.strptime(row['outtime'],'%Y-%m-%d %H:%M:%S'))140            r2 = Range(start= record_starttime, end = record_endtime)141            latest_start = max(r1.start, r2.start)142            earliest_end = min(r1.end, r2.end)143            delta = (earliest_end - latest_start).days + 1144            if delta >= 0 :145              print('RECORD EXISTS FOR THE ICU STAYS WITH THE SIGNALS NEEDED : ', row['subject_id'])146              df_csvdata.loc[index,'timeoverlap'] = 1;147              count_overlaping_records = count_overlaping_records +1 ;148              #todo : adding new dataframe, exatracting required signals, computing avergage for per sminute values in case of per second sampling frequency149            else:            150              print('RECORD DOES NOT EXISTS FOR THE ICU STAYS WITH THE SIGNALS NEEDED : ', row['subject_id'])151              #df_csvdata.loc[index,'timeoverlap'] = 0;152          else:153            #df_csvdata.loc[index,'sig_exists'] =  0 ;154            print('DO NOT SELECT THIS RECORD', row['subject_id'])155        except ValueError:156          print('Error occured while reading waveform: ', record);157    df_csvdata.loc[index,'Number_of_overlaping_records'] = count_overlaping_records;158"""159#testing date part160        161       162            163        Range = namedtuple('Range', ['start', 'end'])164        print ('intime :', datetime.datetime.strptime(row['intime'],'%Y-%m-%d %H:%M:%S') ); 165               166        print ('outtime :', datetime.datetime.strptime(row['outtime'],'%Y-%m-%d %H:%M:%S') ); 167        print ('record starttime' , record_starttime);168        print ('record endtime' , record_endtime);169        r1 = Range(start= datetime.datetime.strptime(row['intime'],'%Y-%m-%d %H:%M:%S'), end= datetime.datetime.strptime(row['outtime'],'%Y-%m-%d %H:%M:%S'))170        r2 = Range(start= record_starttime, end = record_endtime)171        latest_start = max(r1.start, r2.start)172        earliest_end = min(r1.end, r2.end)173        delta = (earliest_end - latest_start).days + 1174        if delta > 0 :175          print('NO OVERLAP BETWEEN RECORD DATETIME AND ICU STAY DATETIME')176          df_csvdata['TS_exists'] = 1;177        #overlap = max(0, delta) #to find exact overlap days178"""179"""180# check dataframe 181print(df_csvdata[(df_csvdata['sig_exists'] == 1)].shape)182print(df_csvdata[(df_csvdata['timeoverlap'] == 1)].shape)183df_csvdata.to_csv ('df_TS_exists_withoutTEMP_overlapcount.csv', sep=',', index = False, header=True);184from google.colab import files185files.download('df_TS_exists_withoutTEMP_overlapcount.csv')186"""187# check dataframe 188print(df_csvdata[(df_csvdata['sig_exists'] == 1)].shape)189print(df_csvdata[(df_csvdata['timeoverlap'] == 1)].shape)190df_csvdata.to_csv ('df_TS_exists_withoutTEMP_overlapcount.csv', sep=',', index = False, header=True);191from google.colab import files192files.download('df_TS_exists_withoutTEMP_overlapcount.csv')193# Commented out IPython magic to ensure Python compatibility.194#example to send to Marcela195!pip install wfdb196import io197import pandas as pd198from IPython.display import display199import matplotlib.pyplot as plt200# %matplotlib inline201import numpy as np202import os203import shutil204import posixpath205import wfdb206import urllib.request207import datetime208"""209'HR',210  'PULSE', 211  'ABP SYS',212  'ABP DIAS',213"""214channels = ['ABP DIAS','ABP SYS','PULSE','temp','HR']215signals,fields = wfdb.rdsamp('p042930-2190-07-28-20-30n', pn_dir='mimic3wdb/matched/p04/p042930/', channel_names=['HR','ABP MEAN', 'ABP SYS','ABP DIAS'], sampfrom=100, sampto=120)216print('                                           ')217print('                                           ')218print('                                           ')219wfdb.plot_items(signal=signals, fs=fields['fs'])220print('Printing signals')221display(signals)222print('Printing fields')223display(fields)224print('------------------------------')225print ('fs' , fields['fs']);226print ('signal length',fields['sig_len']);227print ('date' ,fields['base_date'] );        228print ('time' ,fields['base_time'] );229record_starttime = datetime.datetime.combine(fields['base_date'] ,fields['base_time'] ) ;230print ('%.3f'%(fields['fs']))231if  '%.3f'%(fields['fs']) == '1.000':232  print ('Sampled once per second')233  record_endtime = record_starttime + datetime.timedelta(seconds = (fields['sig_len']-1)) ;234elif '%.3f'%(fields['fs'])== '0.017' :235  print('Sampled once per minute')236  record_endtime = record_starttime + datetime.timedelta(minutes = (fields['sig_len']-1)) ;237else :238  print('ERROR IN SAMPLING')239# Commented out IPython magic to ensure Python compatibility.240#### To show a sample of person suffereing from sepsis with high values for  ABP Systolic (90 and less than 120) and low values for ABP diastolic (60 and less than 80)241# SAMPLE FREQUENCY: ONCE PER SECOND242#for i in subject_ids:243 # print (i.zfill(6)); --2186-01-26 14:13:07244!pip install wfdb245import io246import pandas as pd247from IPython.display import display248import matplotlib.pyplot as plt249# %matplotlib inline250import numpy as np251import os252import shutil253import posixpath254import wfdb255import urllib.request256import datetime257signals,fields = wfdb.rdsamp('p030582-2129-04-07-17-23n', pn_dir='mimic3wdb/matched/p03/p030582/')#sampfrom = 150 , sampto = 200258wfdb.plot_items(signal=signals, fs=fields['fs'])259#display(record.__dict__)260display(signals)261display(fields)262df_ts_records_columns = ['RECORD','TIME','HR', 'SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP'] 263df_ts_records = pd.DataFrame(columns=df_ts_records_columns);264for i in fields['sig_name']:265  if i.upper().replace(' ','') == 'HR':266    idx_HR = fields['sig_name'].index(i);267    df_ts_records['HR']= signals[:,idx_HR ]268  elif (( i.upper().replace(' ','') == 'SPO2') or (i.upper().replace(' ','') =='%SPO2')):269    idx_SPO2 = fields['sig_name'].index(i);270    df_ts_records['SPO2']= signals[:,idx_SPO2]271  elif i.upper().replace(' ','') == 'ABPSYS' :272    idx_ABPSYS = fields['sig_name'].index(i);273    df_ts_records['ABPSYS']= signals[:,idx_ABPSYS]274  elif i.upper().replace(' ','') == 'ABPDIAS' :275    idx_ABPDIAS = fields['sig_name'].index(i);276    df_ts_records['ABPDIAS']= signals[:,idx_ABPDIAS]277  elif i.upper().replace(' ','') == 'ABPMEAN' :278    idx_ABPMEAN = fields['sig_name'].index(i);279    df_ts_records['ABPMEAN']= signals[:,idx_ABPMEAN]280  281  elif i.upper().replace(' ','') == 'RESP' :282    idx_RESP = fields['sig_name'].index(i);283    df_ts_records['RESP']= signals[:,idx_RESP]284  285record_starttime = datetime.datetime.combine(fields['base_date'] ,fields['base_time'] ) ;286if  '%.3f'%(fields['fs']) == '1.000' :287  print ('Sampled once per second')288  record_endtime = record_starttime + datetime.timedelta(seconds = (fields['sig_len']-1)) ;289elif '%.3f'%(fields['fs'])== '0.017' :290  print('Sampled once per minute')291  record_endtime = record_starttime + datetime.timedelta(minutes = (fields['sig_len']-1)) ;292else :293  print('ERROR IN SAMPLING')  294print ('start time: ', record_starttime);295print ('end time: ', record_endtime);296"""  START COMMENT297df_ts_records['TIME'] = pd.date_range( record_starttime , periods = fields['sig_len'], freq='S'); 298df_ts_records.TIME = pd.to_datetime(df_ts_records.TIME)299#dat['STA_STD_NEW'] = dat['STA_STD']300#dat.loc[dat['STA_STD'].dt.time == datetime.time(23,59), 'STA_STD_NEW'] += datetime.timedelta(minutes=1)301#In [5]: dat302df_ts_records['RECORD'] = 'p081193-2186-01-26-19-36n';303print(df_ts_records)304# to aggregate and convert per second values into per minute values305start_idx = 0;306df_ts_records_columns_new = ['RECORD','TIME','HR', 'SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP'] 307df_ts_records_new = pd.DataFrame(columns=df_ts_records_columns_new);308print('length of new df  '  ,df_ts_records_new.shape[0] )309for index, rows in df_ts_records.iterrows():310  if start_idx >= df_ts_records.shape[0]:311    exit;312  else: 313    #print(df_ts_records.iloc[start_idx: (start_idx+2), 0:4])314    print(df_ts_records.iloc[start_idx: (start_idx+60), 2:8])315    array = np.array( df_ts_records.iloc[start_idx: (start_idx+60), 2:8].mean(axis=0))316    print('printing array of average')317    print (array)318    current_index = df_ts_records_new.shape[0]319    df_ts_records_new.loc[current_index ,'HR']= array[0]320    df_ts_records_new.loc[current_index,'SPO2']= array[1]321    df_ts_records_new.loc[current_index,'ABPSYS']= array[2]322    df_ts_records_new.loc[current_index,'ABPDIAS']= array[3]323    df_ts_records_new.loc[current_index,'ABPMEAN']= array[4]324    df_ts_records_new.loc[current_index,'RESP']= array[5]325    print(df_ts_records_new)326    print('next average')327    start_idx = start_idx+60;328    #print('start index :: ' , start_idx)329df_ts_records_new['TIME'] = pd.date_range(record_starttime, periods=(fields['sig_len']/60), freq='1min'); 330df_ts_records_new.TIME = pd.to_datetime(df_ts_records_new.TIME)331df_ts_records_new['RECORD'] = 'p081193-2186-01-26-19-36n';332print(df_ts_records_new)333  334""" #END COMMENT335# testing to convert per second data into per minute336"""337#df.iloc[1:3, 0:3]338mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},339          {'a': 100, 'b': 200, 'c': 300, 'd': 400},340          {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 },341          {'a': 10, 'b': 30, 'c': 50, 'd': 70 },342          {'a': 20, 'b': 40, 'c': 60, 'd': 80 },343          {'a': 20, 'b': 40, 'c': 60, 'd': 80 },344          {'a': 20, 'b': 40, 'c': 60, 'd': 80 },345          {'a': 20, 'b': 40, 'c': 60, 'd': 80 },346          {'a': 20, 'b': 40, 'c': 60, 'd': 80 }347          ]348import numpy as np349df = pd.DataFrame(mydict)350print (df)351"""352"""353print(df.shape)354print('-----')355print(df.iloc[0:2, 0:4])356dd = pd.DataFrame( df.iloc[0:2, 0:4].mean(axis =0))357print(dd)358"""359"""360print('---------------------')361start_idx = 0 362new_df=pd.DataFrame(columns=['a','b','c','d']);363print(new_df)364print('length of new df  '  ,new_df.shape[0] )365for index, rows in df.iterrows():366  if start_idx > df.shape[0]:367    exit;368  else: 369    print(df.iloc[start_idx: (start_idx+2), 0:4])370    print(df.iloc[start_idx: (start_idx+2), 0:4].mean(axis=0))371    array = np.array( df.iloc[start_idx: (start_idx+2), 0:4].mean(axis=0))372    print (array)373    374    current_index = new_df.shape[0]375    new_df.loc[current_index ,'a']= array[0]376    new_df.loc[current_index,'b']= array[1]377    new_df.loc[current_index,'c']= array[2]378    new_df.loc[current_index,'d']= array[3]379    print(new_df)380    print('next average')381    start_idx = start_idx+2;382    print('start index :: ' , start_idx)383  384"""385#### To show a sample of person NOT suffereing from sepsis 386##############################387################################388#for i in subject_ids:389 # print (i.zfill(6)); --2186-01-26 14:13:07390 391 392 # SAMPLE FREQUENCY: ONCE PER MINUTE393df_ts_records_columns = ['RECORD','TIME','HR', 'SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP','TEMP'] 394sig_list_1 = ['HR', 'SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP','TEMP'] 395sig_list_2 = ['HR', '%SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP','TEMP']396df_ts_records = pd.DataFrame(columns=df_ts_records_columns);397signals, fields = wfdb.rdsamp('p059864-2173-05-16-11-56n', pn_dir ='mimic3wdb/matched/p05/p059864/')398# signals, fields = wfdb.rdsamp('p042930-2190-07-28-20-30n', pn_dir='mimic3wdb/matched/p04/p042930/',sampfrom=0, sampto=10)399wfdb.plot_items(signal=signals, fs=fields['fs'])400display(signals)401print( signals.shape) ;402display(fields)403for i in fields['sig_name']:404  if i.upper().replace(' ','') == 'HR':405    idx_HR = fields['sig_name'].index(i);406    df_ts_records['HR']= signals[:,idx_HR ]407  elif (( i.upper().replace(' ','') == 'SPO2') or (i.upper().replace(' ','') =='%SPO2')):408    idx_SPO2 = fields['sig_name'].index(i);409    df_ts_records['SPO2']= signals[:,idx_SPO2]410  elif i.upper().replace(' ','') == 'ABPSYS' :411    idx_ABPSYS = fields['sig_name'].index(i);412    df_ts_records['ABPSYS']= signals[:,idx_ABPSYS]413  elif i.upper().replace(' ','') == 'ABPDIAS' :414    idx_ABPDIAS = fields['sig_name'].index(i);415    df_ts_records['ABPDIAS']= signals[:,idx_ABPDIAS]416  elif i.upper().replace(' ','') == 'ABPMEAN' :417    idx_ABPMEAN = fields['sig_name'].index(i);418    df_ts_records['ABPMEAN']= signals[:,idx_ABPMEAN]419  420  elif i.upper().replace(' ','') == 'RESP' :421    idx_RESP = fields['sig_name'].index(i);422    df_ts_records['RESP']= signals[:,idx_RESP]423  elif i.upper().replace(' ','') == 'TEMP':424    idx_TEMP = fields['sig_name'].index(i);425    df_ts_records['TEMP']= signals[:,idx_TEMP ]426  427print(df_ts_records);428record_starttime = datetime.datetime.combine(fields['base_date'] ,fields['base_time'] ) ;429if  '%.3f'%(fields['fs']) == '1.000' :430  print ('Sampled once per second')431  record_endtime = record_starttime + datetime.timedelta(seconds = ( fields['sig_len']-1 )) ;432elif '%.3f'%(fields['fs'])== '0.017' :433  print('Sampled once per minute')434  record_endtime = record_starttime + datetime.timedelta(minutes = ( fields['sig_len']-1 )) ;435else :436  print('ERROR IN SAMPLING')  437print ('start time: ', record_starttime);438print ('end time: ', record_endtime);439df_ts_records['TIME'] = pd.date_range(record_starttime, periods=fields['sig_len'], freq='1min'); 440df_ts_records.TIME = pd.to_datetime(df_ts_records.TIME)441df_ts_records['RECORD'] = 'p042930-2190-07-28-20-30n';442#print('--------#####-----')443#print(df_ts_records[ (df_ts_records['TIME'] >= '2151-10-06 05:25:35') & (df_ts_records['TIME'] <= '2151-10-06 05:29:35')])444#print('dropping all null')445#print(df_ts_records.dropna())446# Commented out IPython magic to ensure Python compatibility.447#### IMPORTANT ! DO NOT DELETE448#To get patient records if multiple ts existis 449!pip install wfdb450import io451import pandas as pd452from IPython.display import display453import matplotlib.pyplot as plt454# %matplotlib inline455import numpy as np456import os457import shutil458import posixpath459import wfdb460import urllib.request461import datetime462from collections import namedtuple463df_ts_records_columns = ['RECORD','TIME','HR', 'SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP'] 464df_ts_records = pd.DataFrame(columns=df_ts_records_columns); 465#subject_id= 48149; # per second multiple 466#icu_intime = datetime.datetime(2127, 5, 25, 8, 34,39) # for 48149467#icu_outtime = datetime.datetime(2127, 6, 16, 1, 15,22) # for 4814946866965	23/01/2198 12:41	28/01/2198 19:16	22/01/2198 22:46	23/01/2198 13:46469subject_id= 55638; # per minute 470icu_intime = datetime.datetime(2106, 11, 25, 12, 37,32) # for 55638471icu_outtime = datetime.datetime(2106, 11, 27, 10, 49,33) # for 55638472print ('icu intime =', icu_intime)473print ('icu outtime', icu_outtime)474"""475subject_id= 59864;476icu_intime = datetime.datetime(2173, 5, 16, 12, 14,45)477print ('icu intime =', icu_intime)478icu_outtime = datetime.datetime(2173, 6, 8, 15, 45,23)479print ('icu outtime', icu_outtime)480#2173-05-16 12:14:45,2173-06-08 15:45:23,481"""482wdb_dir_path = 'mimic3wdb/matched/p'+ str(subject_id).zfill(6)[:2] + '/p' + str(subject_id).zfill(6) + '/';483wdb_path_toAllRecords = 'https://archive.physionet.org/physiobank/database/'+ wdb_dir_path + 'RECORDS';484wdb_records =  urllib.request.urlopen(wdb_path_toAllRecords);  485try:486  df_ts_records.drop(df_ts_records.index, inplace=True)487except:488  print('df_ts_records does not exist')489count_overlap = 0; 490for lines in wdb_records.readlines():491    record = lines.decode("utf-8"); 492    record = str(record).rstrip()493    #print (record[-1:])494    if record[-1:] == 'n':495      print(record);496      #print (wdb_dir_path);497      record = str(record).rstrip()498      499      500      #try:501      signals =''502      fields = ''503      signals,fields = wfdb.rdsamp(record, pn_dir=wdb_dir_path) ; 504        505      list_sig_name = [item.upper().replace(' ','') for item in fields['sig_name']]506      sig_exist_1 = all(x in list_sig_name for x in ['HR', 'SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP']);  #%SpO2507      sig_exist_2 = all(x in list_sig_name for x in ['HR', '%SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP']); 508      record_starttime = datetime.datetime.combine(fields['base_date'] ,fields['base_time'] ) ;509            510      if  '%.3f'%(fields['fs']) == '1.000' :511        record_endtime = record_starttime + datetime.timedelta(seconds= (fields['sig_len']-1)) ;512      elif '%.3f'%(fields['fs'])== '0.017' :513        record_endtime = record_starttime + datetime.timedelta(minutes = (fields['sig_len']-1)) ;514      else : 515        print('ERROR IN SAMPLING');516        print(record);517        print(wdb_dir_path);518      print('record START time:  ', record_starttime)519      print('record END time:  ', record_endtime)520      Range = namedtuple('Range', ['start', 'end'])521      r1 = Range(start= icu_intime, end= icu_outtime)522      r2 = Range(start= record_starttime, end = record_endtime)523      latest_start = max(r1.start, r2.start)524      earliest_end = min(r1.end, r2.end)525      delta = (earliest_end - latest_start).days + 1526       #delta >= 0 :527      print('sig_exist_1 : ', sig_exist_1)528      print('sig_exist_2 : ', sig_exist_2)529      print('delta : ', delta)530      if ( ((sig_exist_1 == True) or (sig_exist_2 == True)) and (delta >= 0)):531        ###532        try:533          df_ts_indv_record_temp.drop(df_ts_indv_record_temp.index, inplace=True)534        except:535          print('individual record for a single patient df does not exists')536          537        df_ts_indv_record_temp = pd.DataFrame(columns = df_ts_records_columns ) # individual record for a single patient #safiya538        ###539        df_row_idx = df_ts_records.shape[0] ;540        print('length of signal: ', len(signals))541        print('index of dataframe before inserting into it: ', df_row_idx)542         543        for i in fields['sig_name']:544          545          if i.upper().replace(' ','') == 'HR':546            idx_HR = fields['sig_name'].index(i);547          elif (( i.upper().replace(' ','') == 'SPO2') or (i.upper().replace(' ','') =='%SPO2')):548            idx_SPO2 = fields['sig_name'].index(i);549          elif i.upper().replace(' ','') == 'ABPSYS' :550            idx_ABPSYS = fields['sig_name'].index(i);551          elif i.upper().replace(' ','') == 'ABPDIAS' :552            idx_ABPDIAS = fields['sig_name'].index(i);553          elif i.upper().replace(' ','') == 'ABPMEAN' :554            idx_ABPMEAN = fields['sig_name'].index(i);555          elif i.upper().replace(' ','') == 'RESP' :556            idx_RESP = fields['sig_name'].index(i);557            558        559        560        if count_overlap == 0 : 561            if record_starttime > icu_intime:562              print('inserting nulls before the record start time')563              #print( (datetime.datetime.strptime((icu_intime.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M'))  ) #+ datetime.timedelta(seconds= int(record_starttime.strftime('%S')))  )564              #print(icu_intime.strftime('%Y-%m-%d %H:%M'))565              if '%.3f'%(fields['fs'])== '0.017' :566                minutes_to_insert_start = (datetime.datetime.strptime((record_starttime.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M'))- (datetime.datetime.strptime((icu_intime.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M'))567              elif '%.3f'%(fields['fs'])==  '1.000' :568                minutes_to_insert_start = record_starttime - icu_intime569              print('minutes_to_insert_start:  ', minutes_to_insert_start)570              duration_in_s = minutes_to_insert_start.total_seconds()571              minutes_to_insert_start = divmod(duration_in_s, 60)[0] - 1 572              try:573                df_ts_records_time_temp_start.drop(df_ts_records_time_temp_start.index,  inplace=True)574              except :575                print( 'df_ts_records_time_temp_start does not exist')576              577              df_ts_records_time_temp_start = pd.DataFrame(columns=df_ts_records_columns)578              if '%.3f'%(fields['fs'])== '0.017' :579                df_ts_records_time_temp_start['TIME'] = pd.date_range(icu_intime + datetime.timedelta(minutes=1), 580                                                              periods=minutes_to_insert_start, freq='1min'); 581              elif '%.3f'%(fields['fs'])== '1.000' :582                df_ts_records_time_temp_start['TIME'] = pd.date_range(icu_intime + datetime.timedelta(seconds=1), 583                                                              periods= (duration_in_s-1), freq='S'); 584              print ('INSERTING ONLY NULL IN START:')585              print (df_ts_records_time_temp_start)586              df_ts_indv_record_temp = df_ts_indv_record_temp.append(df_ts_records_time_temp_start, ignore_index=True);587              print('inserting nulls in start IN INDV LEVEL')588              print(df_ts_indv_record_temp)589            try:590              df_ts_records_temp.drop(df_ts_records_temp.index,  inplace=True)591            except:592              print( 'df_ts_records_temp does not exist')593            df_ts_records_temp = pd.DataFrame(columns=df_ts_records_columns)594            df_ts_records_temp['HR']= signals[:,idx_HR ] 595            df_ts_records_temp['SPO2']= signals[:,idx_SPO2 ] 596            df_ts_records_temp['ABPSYS']= signals[:,idx_ABPSYS ] 597            df_ts_records_temp['ABPDIAS']= signals[:,idx_ABPDIAS ] 598            df_ts_records_temp['ABPMEAN']= signals[:,idx_ABPMEAN ] 599            df_ts_records_temp['RESP']= signals[:,idx_RESP ] 600            if '%.3f'%(fields['fs'])== '0.017' :601              df_ts_records_temp['TIME'] = pd.date_range(record_starttime, periods=fields['sig_len'], freq='1min'); 602            elif '%.3f'%(fields['fs'])== '1.000' :603              df_ts_records_temp['TIME'] = pd.date_range(record_starttime, periods=fields['sig_len'], freq='S'); 604            df_ts_records_temp.TIME = pd.to_datetime(df_ts_records_temp.TIME)605            df_ts_indv_record_temp = df_ts_indv_record_temp.append(df_ts_records_temp, ignore_index=True); #safiya606            print('inserting nulls in start + first record data')607            print(df_ts_indv_record_temp)608            if '%.3f'%(fields['fs'])== '1.000' : #safiya609              print("AGGREGATING")610              start_idx = 0;611              df_ts_records_new = pd.DataFrame(columns=df_ts_records_columns);612              #print('length of new df  '  , df_ts_records_new.shape[0] )613              for index, rows in df_ts_indv_record_temp.iterrows():614                print('start index for first: ', start_idx)615                if start_idx >= df_ts_indv_record_temp.shape[0]:616                  exit;617                else: 618                  619                  #print(df_ts_records.iloc[start_idx: (start_idx+60), 2:8])620                  array = np.array( df_ts_indv_record_temp.iloc[start_idx: (start_idx+60), 2:8].mean(axis=0))621                  #print('printing array of average')622                  #print (array)623                  current_index = df_ts_records_new.shape[0]624                  df_ts_records_new.loc[current_index ,'HR']= array[0]625                  df_ts_records_new.loc[current_index,'SPO2']= array[1]626                  df_ts_records_new.loc[current_index,'ABPSYS']= array[2]627                  df_ts_records_new.loc[current_index,'ABPDIAS']= array[3]628                  df_ts_records_new.loc[current_index,'ABPMEAN']= array[4]629                  df_ts_records_new.loc[current_index,'RESP']= array[5]630                  #print(df_ts_records_new)631                  #print('next average')632                  start_idx = start_idx+60;633                  #print('start index :: ' , start_idx)634              print('# record time:  ',df_ts_records_new.shape[0])635              df_ts_records_new['TIME'] = pd.date_range(df_ts_indv_record_temp.loc[0,'TIME'], periods= df_ts_records_new.shape[0], freq='1min'); 636              df_ts_records_new.TIME = pd.to_datetime(df_ts_records_new.TIME)637              #print(df_ts_records_new)638              df_ts_indv_record_temp.drop(df_ts_indv_record_temp.index, inplace=True);639              #df_ts_records = pd.DataFrame(columns=df_ts_records_columns)640              df_ts_records = df_ts_records.append(df_ts_records_new, ignore_index=True);641              print('only first record  aggregated at individual record level: ')642              print(df_ts_records_new)643              print('inserting aggregated first record into  FINAL SUBJEC DATAFRAME')644              print(df_ts_records)645              df_ts_records_new.drop(df_ts_records_new.index, inplace=True)646              df_ts_records['RECORD'] = record   647            else:648              df_ts_records = df_ts_records.append(df_ts_indv_record_temp, ignore_index=True);649              df_ts_records['RECORD'] = record   650              print('inserting nulls in start + first record data into FINAL SUBJEC DATAFRAME')651              print(df_ts_records)652           653               654        else:655            if record_starttime <= icu_outtime :656              last_Record_time = df_ts_records.loc[(df_row_idx-1),'TIME']657              print('main DF last time record: ',last_Record_time )658              if '%.3f'%(fields['fs'])== '0.017' :659                minutes_to_insert = (datetime.datetime.strptime((record_starttime.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M')) - (datetime.datetime.strptime((last_Record_time.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M'))660              elif '%.3f'%(fields['fs'])== '1.000' :661                minutes_to_insert = record_starttime - last_Record_time662              duration_in_s = minutes_to_insert.total_seconds()663              minutes_to_insert = divmod(duration_in_s, 60)[0] - 1664              print ('minutes_to_insert:  ', minutes_to_insert);665              print('seconds to insert: ', duration_in_s)666              try:667                df_ts_records_time_temp.drop(df_ts_records_time_temp.index, inplace=True);668                df_ts_records_temp.drop(df_ts_records_temp.index, inplace=True);669              except:670                print ('df_ts_records_temp and df_ts_records_time_temp does not exits')671              df_ts_records_time_temp = pd.DataFrame(columns=df_ts_records_columns)672              if '%.3f'%(fields['fs'])== '0.017' :673                df_ts_records_time_temp['TIME'] = pd.date_range(last_Record_time + datetime.timedelta(minutes=1), 674                                                              periods=minutes_to_insert, freq='1min'); 675              elif  '%.3f'%(fields['fs'])== '1.000' :676                print('last record time' , last_Record_time)677                print('(duration_in_s-1)' , (duration_in_s-1))678                df_ts_records_time_temp['TIME'] = pd.date_range(last_Record_time + datetime.timedelta(seconds=1), 679                                                              periods=(duration_in_s-1), freq='S'); 680              print ('INSERTING ONLY NULL UNTILL NEXT RECORD START TIME:')681              print (df_ts_records_time_temp)682              df_ts_indv_record_temp = df_ts_indv_record_temp.append(df_ts_records_time_temp, ignore_index=True);683              print('inserting nulls UNTILL NEXT RECORD START TIME INTO INDV LEVEL')684              print(df_ts_indv_record_temp)685              df_ts_records_temp = pd.DataFrame(columns=df_ts_records_columns)686          687              df_ts_records_temp['HR']= signals[:,idx_HR ] 688              df_ts_records_temp['SPO2']= signals[:,idx_SPO2 ] 689              df_ts_records_temp['ABPSYS']= signals[:,idx_ABPSYS ] 690              df_ts_records_temp['ABPDIAS']= signals[:,idx_ABPDIAS ] 691              df_ts_records_temp['ABPMEAN']= signals[:,idx_ABPMEAN ] 692              df_ts_records_temp['RESP']= signals[:,idx_RESP ] 693              if '%.3f'%(fields['fs'])== '0.017' :694                df_ts_records_temp['TIME'] = pd.date_range(record_starttime, periods=fields['sig_len'], freq='1min'); 695              elif  '%.3f'%(fields['fs'])== '1.000' :696                df_ts_records_temp['TIME'] = pd.date_range(record_starttime, periods=fields['sig_len'], freq='S'); 697              df_ts_records_temp.TIME = pd.to_datetime(df_ts_records_temp.TIME)698            699              print('before appending: ')700            701              print( df_ts_records_temp);702              df_ts_indv_record_temp = df_ts_indv_record_temp.append(df_ts_records_temp, ignore_index=True);703              print('inserting nulls in start + SECOND record data')704              print(df_ts_indv_record_temp)705              if '%.3f'%(fields['fs'])== '1.000' : #safiya706                start_idx = 0;707                df_ts_records_new = pd.DataFrame(columns=df_ts_records_columns);708                #print('length of new df  '  , df_ts_records_new.shape[0] )709                for index, rows in df_ts_indv_record_temp.iterrows():710                  if start_idx >= df_ts_indv_record_temp.shape[0]:711                    exit;712                  else: 713                    714                    #print(df_ts_records.iloc[start_idx: (start_idx+60), 2:8])715                    array = np.array( df_ts_indv_record_temp.iloc[start_idx: (start_idx+60), 2:8].mean(axis=0))716                    #print('printing array of average')717                    #print (array)718                    current_index = df_ts_records_new.shape[0]719                    df_ts_records_new.loc[current_index ,'HR']= array[0]720                    df_ts_records_new.loc[current_index,'SPO2']= array[1]721                    df_ts_records_new.loc[current_index,'ABPSYS']= array[2]722                    df_ts_records_new.loc[current_index,'ABPDIAS']= array[3]723                    df_ts_records_new.loc[current_index,'ABPMEAN']= array[4]724                    df_ts_records_new.loc[current_index,'RESP']= array[5]725                    #print(df_ts_records_new)726                    #print('next average')727                    start_idx = start_idx+60;728                    #print('start index :: ' , start_idx)729                print('# record time:  ',df_ts_records_new.shape[0])730                df_ts_records_new['TIME'] = pd.date_range(df_ts_indv_record_temp.loc[0,'TIME'], periods= df_ts_records_new.shape[0], freq='1min'); 731                df_ts_records_new.TIME = pd.to_datetime(df_ts_records_new.TIME)732                #print(df_ts_records_new)733                df_ts_indv_record_temp.drop(df_ts_indv_record_temp.index, inplace=True);734                #df_ts_records = pd.DataFrame(columns=df_ts_records_columns)735                df_ts_records = df_ts_records.append(df_ts_records_new, ignore_index=True);736                737                print('only first record  aggregated at individual record level: ')738                print(df_ts_records_new)739                print('inserting aggregated first record into  FINAL SUBJEC DATAFRAME')740                print(df_ts_records)741                df_ts_records_new.drop(df_ts_records_new.index, inplace=True)742                df_ts_records['RECORD'] = record   743              else:744                df_ts_records = df_ts_records.append(df_ts_indv_record_temp, ignore_index=True);745                df_ts_records['RECORD'] = record   746                print('inserting nulls in start + first record data into FINAL SUBJEC DATAFRAME')747                print(df_ts_records)748           749              750        count_overlap = count_overlap +1751        print('overlap count after all insertions: ', count_overlap )752      else:753        print('Either all 6 signals not exists or there is no overlapt with recording time and ICU in time and out time')754last_record_idx = df_ts_records.shape[0] - 1755all_records_end_time = df_ts_records.loc[last_record_idx,'TIME']756      757if (all_records_end_time < icu_outtime  ):758  #print('INSERTING NULLS AT THE END')759  try:760    df_ts_records_time_temp_end.drop(df_ts_records_time_temp_end.index, inplace=True)761  except:762    print('df_ts_records_time_temp_end does not exists')763  #print('main DF last time record: ',last_Record_time )764  if '%.3f'%(fields['fs'])== '0.017' :765    minutes_to_insert_end =  (datetime.datetime.strptime((icu_outtime.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M')) - (datetime.datetime.strptime((all_records_end_time.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M')) 766  elif '%.3f'%(fields['fs'])== '1.000' :767    minutes_to_insert_end = icu_outtime - all_records_end_time;768  duration_in_s = minutes_to_insert_end.total_seconds()769  minutes_to_insert_end = divmod(duration_in_s, 60)[0] - 1770  df_ts_records_time_temp_end = pd.DataFrame(columns=df_ts_records_columns)771        772  df_ts_records_time_temp_end['TIME'] = pd.date_range(all_records_end_time + datetime.timedelta(minutes=1), 773                                                              periods=minutes_to_insert_end, freq='1min'); 774  df_ts_records = df_ts_records.append(df_ts_records_time_temp_end, ignore_index=True);775      776  df_ts_records['RECORD'] = record777print('printing final data for this patient')778print(df_ts_records)779#print(df_ts_records)780df_ts_records.to_csv ('SampleRecordWith6SigalsExtracted.csv', sep=',', index = False, header=True);781from google.colab import files...12_after_caculation_of_shock_onsettimeto_check_waveforms_and_plot_for_sample_test_patients.py
Source:12_after_caculation_of_shock_onsettimeto_check_waveforms_and_plot_for_sample_test_patients.py  
1# -*- coding: utf-8 -*-2"""12_after caculation of shock onsettimeTo check waveforms and plot for sample test patients.ipynb3Automatically generated by Colaboratory.4Original file is located at5    https://colab.research.google.com/drive/1AmJmUX_2wyz2vJcyoXaXQnHin75KLwyR6"""7# Commented out IPython magic to ensure Python compatibility.8!pip install wfdb9import io10import pandas as pd11from IPython.display import display12import matplotlib.pyplot as plt13# %matplotlib inline14import numpy as np15import os16import shutil17import posixpath18import wfdb19import urllib.request20import datetime21from collections import namedtuple22import seaborn as sns23import matplotlib.ticker as ticker24from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,25                               AutoMinorLocator)26from matplotlib.dates import DateFormatter27import matplotlib.dates as mdates28# Commented out IPython magic to ensure Python compatibility.29#to test sample patient suffereing  from sepsis30#### IMPORTANT ! DO NOT DELETE31#To get patient records if multiple ts existis 32!pip install wfdb33import io34import pandas as pd35from IPython.display import display36import matplotlib.pyplot as plt37# %matplotlib inline38import numpy as np39import os40import shutil41import posixpath42import wfdb43import urllib.request44import datetime45from collections import namedtuple46df_ts_records_columns = ['RECORD','TIME','HR', 'SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP'] 47df_ts_records = pd.DataFrame(columns=df_ts_records_columns); 48# 66965	only sepsis no septic shock'49# 51871 both sepsis and septic shock 07/01/2162 15:30	12/01/2162 02:5650# 61619	28/11/2121 21:04	03/12/2121 15:55	28/11/2121 22:23	28/11/2121 21:23	29/11/2121 05:5951subject_id= 61619 52icu_intime = datetime.datetime(2121, 11, 28, 21, 4,00) # for 6696553icu_outtime = datetime.datetime(2121, 12, 3, 4, 15,55) # for 6696554print ('icu intime =', icu_intime)55print ('icu outtime', icu_outtime)56"""57subject_id= 59864;58icu_intime = datetime.datetime(2173, 5, 16, 12, 14,45)59print ('icu intime =', icu_intime)60icu_outtime = datetime.datetime(2173, 6, 8, 15, 45,23)61print ('icu outtime', icu_outtime)62#2173-05-16 12:14:45,2173-06-08 15:45:23,63"""64wdb_dir_path = 'mimic3wdb/matched/p'+ str(subject_id).zfill(6)[:2] + '/p' + str(subject_id).zfill(6) + '/';65wdb_path_toAllRecords = 'https://archive.physionet.org/physiobank/database/'+ wdb_dir_path + 'RECORDS';66wdb_records =  urllib.request.urlopen(wdb_path_toAllRecords);  67try:68  df_ts_records.drop(df_ts_records.index, inplace=True)69except:70  print('df_ts_records does not exist')71count_overlap = 0; 72for lines in wdb_records.readlines():73    record = lines.decode("utf-8"); 74    record = str(record).rstrip()75    #print (record[-1:])76    if record[-1:] == 'n':77      print(record);78      #print (wdb_dir_path);79      record = str(record).rstrip()80      81      82      #try:83      signals =''84      fields = ''85      signals,fields = wfdb.rdsamp(record, pn_dir=wdb_dir_path) ; 86        87      list_sig_name = [item.upper().replace(' ','') for item in fields['sig_name']]88      sig_exist_1 = all(x in list_sig_name for x in ['HR', 'SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP']);  #%SpO289      sig_exist_2 = all(x in list_sig_name for x in ['HR', '%SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP']); 90      record_starttime = datetime.datetime.combine(fields['base_date'] ,fields['base_time'] ) ;91            92      if  '%.3f'%(fields['fs']) == '1.000' :93        record_endtime = record_starttime + datetime.timedelta(seconds= (fields['sig_len']-1)) ;94      elif '%.3f'%(fields['fs'])== '0.017' :95        record_endtime = record_starttime + datetime.timedelta(minutes = (fields['sig_len']-1)) ;96      else : 97        print('ERROR IN SAMPLING');98        print(record);99        print(wdb_dir_path);100      print('record START time:  ', record_starttime)101      print('record END time:  ', record_endtime)102      Range = namedtuple('Range', ['start', 'end'])103      r1 = Range(start= icu_intime, end= icu_outtime)104      r2 = Range(start= record_starttime, end = record_endtime)105      latest_start = max(r1.start, r2.start)106      earliest_end = min(r1.end, r2.end)107      delta = (earliest_end - latest_start).days + 1108       #delta >= 0 :109      print('sig_exist_1 : ', sig_exist_1)110      print('sig_exist_2 : ', sig_exist_2)111      print('delta : ', delta)112      if ( ((sig_exist_1 == True) or (sig_exist_2 == True)) ) : #and (delta >= 0)):113        ###114        try:115          df_ts_indv_record_temp.drop(df_ts_indv_record_temp.index, inplace=True)116        except:117          print('individual record for a single patient df does not exists')118          119        df_ts_indv_record_temp = pd.DataFrame(columns = df_ts_records_columns ) # individual record for a single patient #safiya120        ###121        df_row_idx = df_ts_records.shape[0] ;122        print('length of signal: ', len(signals))123        print('index of dataframe before inserting into it: ', df_row_idx)124         125        for i in fields['sig_name']:126          127          if i.upper().replace(' ','') == 'HR':128            idx_HR = fields['sig_name'].index(i);129          elif (( i.upper().replace(' ','') == 'SPO2') or (i.upper().replace(' ','') =='%SPO2')):130            idx_SPO2 = fields['sig_name'].index(i);131          elif i.upper().replace(' ','') == 'ABPSYS' :132            idx_ABPSYS = fields['sig_name'].index(i);133          elif i.upper().replace(' ','') == 'ABPDIAS' :134            idx_ABPDIAS = fields['sig_name'].index(i);135          elif i.upper().replace(' ','') == 'ABPMEAN' :136            idx_ABPMEAN = fields['sig_name'].index(i);137          elif i.upper().replace(' ','') == 'RESP' :138            idx_RESP = fields['sig_name'].index(i);139            140        141        142        if count_overlap == 0 : 143            if record_starttime > icu_intime:144              print('inserting nulls before the record start time')145              #print( (datetime.datetime.strptime((icu_intime.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M'))  ) #+ datetime.timedelta(seconds= int(record_starttime.strftime('%S')))  )146              #print(icu_intime.strftime('%Y-%m-%d %H:%M'))147              if '%.3f'%(fields['fs'])== '0.017' :148                minutes_to_insert_start = (datetime.datetime.strptime((record_starttime.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M'))- (datetime.datetime.strptime((icu_intime.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M'))149              elif '%.3f'%(fields['fs'])==  '1.000' :150                minutes_to_insert_start = record_starttime - icu_intime151              print('minutes_to_insert_start:  ', minutes_to_insert_start)152              duration_in_s = minutes_to_insert_start.total_seconds()153              minutes_to_insert_start = divmod(duration_in_s, 60)[0] - 1 154              try:155                df_ts_records_time_temp_start.drop(df_ts_records_time_temp_start.index,  inplace=True)156              except :157                print( 'df_ts_records_time_temp_start does not exist')158              159              df_ts_records_time_temp_start = pd.DataFrame(columns=df_ts_records_columns)160              if '%.3f'%(fields['fs'])== '0.017' :161                df_ts_records_time_temp_start['TIME'] = pd.date_range(icu_intime + datetime.timedelta(minutes=1), 162                                                              periods=minutes_to_insert_start, freq='1min'); 163              elif '%.3f'%(fields['fs'])== '1.000' :164                df_ts_records_time_temp_start['TIME'] = pd.date_range(icu_intime + datetime.timedelta(seconds=1), 165                                                              periods= (duration_in_s-1), freq='S'); 166              print ('INSERTING ONLY NULL IN START:')167              print (df_ts_records_time_temp_start)168              df_ts_indv_record_temp = df_ts_indv_record_temp.append(df_ts_records_time_temp_start, ignore_index=True);169              print('inserting nulls in start IN INDV LEVEL')170              print(df_ts_indv_record_temp)171            try:172              df_ts_records_temp.drop(df_ts_records_temp.index,  inplace=True)173            except:174              print( 'df_ts_records_temp does not exist')175            df_ts_records_temp = pd.DataFrame(columns=df_ts_records_columns)176            df_ts_records_temp['HR']= signals[:,idx_HR ] 177            df_ts_records_temp['SPO2']= signals[:,idx_SPO2 ] 178            df_ts_records_temp['ABPSYS']= signals[:,idx_ABPSYS ] 179            df_ts_records_temp['ABPDIAS']= signals[:,idx_ABPDIAS ] 180            df_ts_records_temp['ABPMEAN']= signals[:,idx_ABPMEAN ] 181            df_ts_records_temp['RESP']= signals[:,idx_RESP ] 182            if '%.3f'%(fields['fs'])== '0.017' :183              df_ts_records_temp['TIME'] = pd.date_range(record_starttime, periods=fields['sig_len'], freq='1min'); 184            elif '%.3f'%(fields['fs'])== '1.000' :185              df_ts_records_temp['TIME'] = pd.date_range(record_starttime, periods=fields['sig_len'], freq='S'); 186            df_ts_records_temp.TIME = pd.to_datetime(df_ts_records_temp.TIME)187            df_ts_indv_record_temp = df_ts_indv_record_temp.append(df_ts_records_temp, ignore_index=True); #safiya188            print('inserting nulls in start + first record data')189            print(df_ts_indv_record_temp)190            if '%.3f'%(fields['fs'])== '1.000' : #safiya191              print("AGGREGATING")192              start_idx = 0;193              df_ts_records_new = pd.DataFrame(columns=df_ts_records_columns);194              #print('length of new df  '  , df_ts_records_new.shape[0] )195              for index, rows in df_ts_indv_record_temp.iterrows():196                print('start index for first: ', start_idx)197                if start_idx >= df_ts_indv_record_temp.shape[0]:198                  exit;199                else: 200                  201                  #print(df_ts_records.iloc[start_idx: (start_idx+60), 2:8])202                  array = np.array( df_ts_indv_record_temp.iloc[start_idx: (start_idx+60), 2:8].mean(axis=0))203                  #print('printing array of average')204                  #print (array)205                  current_index = df_ts_records_new.shape[0]206                  df_ts_records_new.loc[current_index ,'HR']= array[0]207                  df_ts_records_new.loc[current_index,'SPO2']= array[1]208                  df_ts_records_new.loc[current_index,'ABPSYS']= array[2]209                  df_ts_records_new.loc[current_index,'ABPDIAS']= array[3]210                  df_ts_records_new.loc[current_index,'ABPMEAN']= array[4]211                  df_ts_records_new.loc[current_index,'RESP']= array[5]212                  #print(df_ts_records_new)213                  #print('next average')214                  start_idx = start_idx+60;215                  #print('start index :: ' , start_idx)216              print('# record time:  ',df_ts_records_new.shape[0])217              df_ts_records_new['TIME'] = pd.date_range(df_ts_indv_record_temp.loc[0,'TIME'], periods= df_ts_records_new.shape[0], freq='1min'); 218              df_ts_records_new.TIME = pd.to_datetime(df_ts_records_new.TIME)219              #print(df_ts_records_new)220              df_ts_indv_record_temp.drop(df_ts_indv_record_temp.index, inplace=True);221              #df_ts_records = pd.DataFrame(columns=df_ts_records_columns)222              df_ts_records = df_ts_records.append(df_ts_records_new, ignore_index=True);223              print('only first record  aggregated at individual record level: ')224              print(df_ts_records_new)225              print('inserting aggregated first record into  FINAL SUBJEC DATAFRAME')226              print(df_ts_records)227              df_ts_records_new.drop(df_ts_records_new.index, inplace=True)228              df_ts_records['RECORD'] = record   229            else:230              df_ts_records = df_ts_records.append(df_ts_indv_record_temp, ignore_index=True);231              df_ts_records['RECORD'] = record   232              print('inserting nulls in start + first record data into FINAL SUBJEC DATAFRAME')233              print(df_ts_records)234           235               236        else:237            if record_starttime <= icu_outtime :238              last_Record_time = df_ts_records.loc[(df_row_idx-1),'TIME']239              print('main DF last time record: ',last_Record_time )240              if '%.3f'%(fields['fs'])== '0.017' :241                minutes_to_insert = (datetime.datetime.strptime((record_starttime.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M')) - (datetime.datetime.strptime((last_Record_time.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M'))242              elif '%.3f'%(fields['fs'])== '1.000' :243                minutes_to_insert = record_starttime - last_Record_time244              duration_in_s = minutes_to_insert.total_seconds()245              minutes_to_insert = divmod(duration_in_s, 60)[0] - 1246              print ('minutes_to_insert:  ', minutes_to_insert);247              print('seconds to insert: ', duration_in_s)248              try:249                df_ts_records_time_temp.drop(df_ts_records_time_temp.index, inplace=True);250                df_ts_records_temp.drop(df_ts_records_temp.index, inplace=True);251              except:252                print ('df_ts_records_temp and df_ts_records_time_temp does not exits')253              df_ts_records_time_temp = pd.DataFrame(columns=df_ts_records_columns)254              if '%.3f'%(fields['fs'])== '0.017' :255                df_ts_records_time_temp['TIME'] = pd.date_range(last_Record_time + datetime.timedelta(minutes=1), 256                                                              periods=minutes_to_insert, freq='1min'); 257              elif  '%.3f'%(fields['fs'])== '1.000' :258                print('last record time' , last_Record_time)259                print('(duration_in_s-1)' , (duration_in_s-1))260                df_ts_records_time_temp['TIME'] = pd.date_range(last_Record_time + datetime.timedelta(seconds=1), 261                                                              periods=(duration_in_s-1), freq='S'); 262              print ('INSERTING ONLY NULL UNTILL NEXT RECORD START TIME:')263              print (df_ts_records_time_temp)264              df_ts_indv_record_temp = df_ts_indv_record_temp.append(df_ts_records_time_temp, ignore_index=True);265              print('inserting nulls UNTILL NEXT RECORD START TIME INTO INDV LEVEL')266              print(df_ts_indv_record_temp)267              df_ts_records_temp = pd.DataFrame(columns=df_ts_records_columns)268          269              df_ts_records_temp['HR']= signals[:,idx_HR ] 270              df_ts_records_temp['SPO2']= signals[:,idx_SPO2 ] 271              df_ts_records_temp['ABPSYS']= signals[:,idx_ABPSYS ] 272              df_ts_records_temp['ABPDIAS']= signals[:,idx_ABPDIAS ] 273              df_ts_records_temp['ABPMEAN']= signals[:,idx_ABPMEAN ] 274              df_ts_records_temp['RESP']= signals[:,idx_RESP ] 275              if '%.3f'%(fields['fs'])== '0.017' :276                df_ts_records_temp['TIME'] = pd.date_range(record_starttime, periods=fields['sig_len'], freq='1min'); 277              elif  '%.3f'%(fields['fs'])== '1.000' :278                df_ts_records_temp['TIME'] = pd.date_range(record_starttime, periods=fields['sig_len'], freq='S'); 279              df_ts_records_temp.TIME = pd.to_datetime(df_ts_records_temp.TIME)280            281              print('before appending: ')282            283              print( df_ts_records_temp);284              df_ts_indv_record_temp = df_ts_indv_record_temp.append(df_ts_records_temp, ignore_index=True);285              print('inserting nulls in start + SECOND record data')286              print(df_ts_indv_record_temp)287              if '%.3f'%(fields['fs'])== '1.000' : #safiya288                start_idx = 0;289                df_ts_records_new = pd.DataFrame(columns=df_ts_records_columns);290                #print('length of new df  '  , df_ts_records_new.shape[0] )291                for index, rows in df_ts_indv_record_temp.iterrows():292                  if start_idx >= df_ts_indv_record_temp.shape[0]:293                    exit;294                  else: 295                    296                    #print(df_ts_records.iloc[start_idx: (start_idx+60), 2:8])297                    array = np.array( df_ts_indv_record_temp.iloc[start_idx: (start_idx+60), 2:8].mean(axis=0))298                    #print('printing array of average')299                    #print (array)300                    current_index = df_ts_records_new.shape[0]301                    df_ts_records_new.loc[current_index ,'HR']= array[0]302                    df_ts_records_new.loc[current_index,'SPO2']= array[1]303                    df_ts_records_new.loc[current_index,'ABPSYS']= array[2]304                    df_ts_records_new.loc[current_index,'ABPDIAS']= array[3]305                    df_ts_records_new.loc[current_index,'ABPMEAN']= array[4]306                    df_ts_records_new.loc[current_index,'RESP']= array[5]307                    #print(df_ts_records_new)308                    #print('next average')309                    start_idx = start_idx+60;310                    #print('start index :: ' , start_idx)311                print('# record time:  ',df_ts_records_new.shape[0])312                df_ts_records_new['TIME'] = pd.date_range(df_ts_indv_record_temp.loc[0,'TIME'], periods= df_ts_records_new.shape[0], freq='1min'); 313                df_ts_records_new.TIME = pd.to_datetime(df_ts_records_new.TIME)314                #print(df_ts_records_new)315                df_ts_indv_record_temp.drop(df_ts_indv_record_temp.index, inplace=True);316                #df_ts_records = pd.DataFrame(columns=df_ts_records_columns)317                df_ts_records = df_ts_records.append(df_ts_records_new, ignore_index=True);318                319                print('only first record  aggregated at individual record level: ')320                print(df_ts_records_new)321                print('inserting aggregated first record into  FINAL SUBJEC DATAFRAME')322                print(df_ts_records)323                df_ts_records_new.drop(df_ts_records_new.index, inplace=True)324                df_ts_records['RECORD'] = record   325              else:326                df_ts_records = df_ts_records.append(df_ts_indv_record_temp, ignore_index=True);327                df_ts_records['RECORD'] = record   328                print('inserting nulls in start + first record data into FINAL SUBJEC DATAFRAME')329                print(df_ts_records)330           331              332        count_overlap = count_overlap +1333        print('overlap count after all insertions: ', count_overlap )334      else:335        print('Either all 6 signals not exists or there is no overlapt with recording time and ICU in time and out time')336last_record_idx = df_ts_records.shape[0] - 1337all_records_end_time = df_ts_records.loc[last_record_idx,'TIME']338      339if (all_records_end_time < icu_outtime  ):340  #print('INSERTING NULLS AT THE END')341  try:342    df_ts_records_time_temp_end.drop(df_ts_records_time_temp_end.index, inplace=True)343  except:344    print('df_ts_records_time_temp_end does not exists')345  #print('main DF last time record: ',last_Record_time )346  if '%.3f'%(fields['fs'])== '0.017' :347    minutes_to_insert_end =  (datetime.datetime.strptime((icu_outtime.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M')) - (datetime.datetime.strptime((all_records_end_time.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M')) 348  elif '%.3f'%(fields['fs'])== '1.000' :349    minutes_to_insert_end = icu_outtime - all_records_end_time;350  duration_in_s = minutes_to_insert_end.total_seconds()351  minutes_to_insert_end = divmod(duration_in_s, 60)[0] - 1352  df_ts_records_time_temp_end = pd.DataFrame(columns=df_ts_records_columns)353        354  df_ts_records_time_temp_end['TIME'] = pd.date_range(all_records_end_time + datetime.timedelta(minutes=1), 355                                                              periods=minutes_to_insert_end, freq='1min'); 356  df_ts_records = df_ts_records.append(df_ts_records_time_temp_end, ignore_index=True);357      358  df_ts_records['RECORD'] = record359print('printing final data for this patient')360print(df_ts_records)361print(df_ts_records.columns)362#28/11/2121 21:23	       29/11/2121 05:59363patient_sepsis_onsettime =  datetime.datetime(2121, 11, 29, 5, 59,00) 364df_ts_records.TIME = pd.to_datetime(df_ts_records.TIME)365df_test_around_sepsis = df_ts_records[(df_ts_records['TIME'] >= (patient_sepsis_onsettime - datetime.timedelta(hours = 8)) )366                                & (df_ts_records['TIME'] <= (patient_sepsis_onsettime + datetime.timedelta(hours= 8) ))]367from collections import namedtuple368import seaborn as sns369import matplotlib.ticker as ticker370from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,371                               AutoMinorLocator)372from matplotlib.dates import DateFormatter373import matplotlib.dates as mdates374pd.set_option('display.max_rows', 2000)375pd.set_option('display.max_columns', 50)376pd.set_option('display.width', 1000)377pd.set_option('max_colwidth', 800)378df_test= df_test_around_sepsis379#df_test = df_ts_records[df_ts_records['SUBJECT_ID']== 40241]  # 98930. 380# sepsis onset time for  subject id 40241: 2192-03-15 13:20:59.000000381#Non septic subjectids  55563, 72269382#print(df_test)383print(df_test.RECORD.unique())384#print(df_test[['TIME','HR', 'SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP','TEMP']])385date_form = DateFormatter('%Y-%m-%d %H:%M:%S')386sns.set(rc={'figure.figsize':(20, 10)})387cols_plot = ['HR', 'SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP']388df_test = df_test.set_index('TIME')389ax = df_test[cols_plot].plot(marker='.', alpha = 0.5,figsize=(50, 30), subplots=True, x_compat=True)390#print(ax)391count =0 392for i in ax: 393  i.xaxis.set_major_formatter(date_form)394  i.xaxis.set_major_locator(mdates.HourLocator(interval=1))395from matplotlib import pyplot as plt...Fetching6signalsforASamplePatientID_55638.py
Source:Fetching6signalsforASamplePatientID_55638.py  
1#### IMPORTANT ! DO NOT DELETE2#To get patient records if multiple ts existis 3!pip install wfdb4import io5import pandas as pd6from IPython.display import display7import matplotlib.pyplot as plt8%matplotlib inline9import numpy as np10import os11import shutil12import posixpath13import wfdb14import urllib.request15import datetime16from collections import namedtuple17df_ts_records_columns = ['RECORD','TIME','HR', 'SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP'] 18df_ts_records = pd.DataFrame(columns=df_ts_records_columns); 19#subject_id= 48149; # per second multiple 20#icu_intime = datetime.datetime(2127, 5, 25, 8, 34,39) # for 4814921#icu_outtime = datetime.datetime(2127, 6, 16, 1, 15,22) # for 4814922subject_id= 55638; # per minute 23icu_intime = datetime.datetime(2106, 11, 25, 12, 37,32) # for 5563824icu_outtime = datetime.datetime(2106, 11, 27, 10, 49,33) # for 5563825print ('icu intime =', icu_intime)26print ('icu outtime', icu_outtime)27"""28subject_id= 59864;29icu_intime = datetime.datetime(2173, 5, 16, 12, 14,45)30print ('icu intime =', icu_intime)31icu_outtime = datetime.datetime(2173, 6, 8, 15, 45,23)32print ('icu outtime', icu_outtime)33#2173-05-16 12:14:45,2173-06-08 15:45:23,34"""35wdb_dir_path = 'mimic3wdb/matched/p'+ str(subject_id).zfill(6)[:2] + '/p' + str(subject_id).zfill(6) + '/';36wdb_path_toAllRecords = 'https://archive.physionet.org/physiobank/database/'+ wdb_dir_path + 'RECORDS';37wdb_records =  urllib.request.urlopen(wdb_path_toAllRecords);  38try:39  df_ts_records.drop(df_ts_records.index, inplace=True)40except:41  print('df_ts_records does not exist')42count_overlap = 0; 43for lines in wdb_records.readlines():44    record = lines.decode("utf-8"); 45    record = str(record).rstrip()46    #print (record[-1:])47    if record[-1:] == 'n':48      print(record);49      #print (wdb_dir_path);50      record = str(record).rstrip()51      52      53      #try:54      signals =''55      fields = ''56      signals,fields = wfdb.rdsamp(record, pn_dir=wdb_dir_path) ; 57        58      list_sig_name = [item.upper().replace(' ','') for item in fields['sig_name']]59      sig_exist_1 = all(x in list_sig_name for x in ['HR', 'SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP']);  #%SpO260      sig_exist_2 = all(x in list_sig_name for x in ['HR', '%SPO2','ABPSYS','ABPDIAS','ABPMEAN','RESP']); 61      record_starttime = datetime.datetime.combine(fields['base_date'] ,fields['base_time'] ) ;62            63      if  '%.3f'%(fields['fs']) == '1.000' :64        record_endtime = record_starttime + datetime.timedelta(seconds= (fields['sig_len']-1)) ;65      elif '%.3f'%(fields['fs'])== '0.017' :66        record_endtime = record_starttime + datetime.timedelta(minutes = (fields['sig_len']-1)) ;67      else : 68        print('ERROR IN SAMPLING');69        print(record);70        print(wdb_dir_path);71      print('record START time:  ', record_starttime)72      print('record END time:  ', record_endtime)73      Range = namedtuple('Range', ['start', 'end'])74      r1 = Range(start= icu_intime, end= icu_outtime)75      r2 = Range(start= record_starttime, end = record_endtime)76      latest_start = max(r1.start, r2.start)77      earliest_end = min(r1.end, r2.end)78      delta = (earliest_end - latest_start).days + 179       #delta >= 0 :80      print('sig_exist_1 : ', sig_exist_1)81      print('sig_exist_2 : ', sig_exist_2)82      print('delta : ', delta)83      if ( ((sig_exist_1 == True) or (sig_exist_2 == True)) and (delta >= 0)):84        ###85        try:86          df_ts_indv_record_temp.drop(df_ts_indv_record_temp.index, inplace=True)87        except:88          print('individual record for a single patient df does not exists')89          90        df_ts_indv_record_temp = pd.DataFrame(columns = df_ts_records_columns ) # individual record for a single patient #safiya91        ###92        df_row_idx = df_ts_records.shape[0] ;93        print('length of signal: ', len(signals))94        print('index of dataframe before inserting into it: ', df_row_idx)95         96        for i in fields['sig_name']:97          98          if i.upper().replace(' ','') == 'HR':99            idx_HR = fields['sig_name'].index(i);100          elif (( i.upper().replace(' ','') == 'SPO2') or (i.upper().replace(' ','') =='%SPO2')):101            idx_SPO2 = fields['sig_name'].index(i);102          elif i.upper().replace(' ','') == 'ABPSYS' :103            idx_ABPSYS = fields['sig_name'].index(i);104          elif i.upper().replace(' ','') == 'ABPDIAS' :105            idx_ABPDIAS = fields['sig_name'].index(i);106          elif i.upper().replace(' ','') == 'ABPMEAN' :107            idx_ABPMEAN = fields['sig_name'].index(i);108          elif i.upper().replace(' ','') == 'RESP' :109            idx_RESP = fields['sig_name'].index(i);110            111        112        113        if count_overlap == 0 : 114            if record_starttime > icu_intime:115              print('inserting nulls before the record start time')116              #print( (datetime.datetime.strptime((icu_intime.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M'))  ) #+ datetime.timedelta(seconds= int(record_starttime.strftime('%S')))  )117              #print(icu_intime.strftime('%Y-%m-%d %H:%M'))118              if '%.3f'%(fields['fs'])== '0.017' :119                minutes_to_insert_start = (datetime.datetime.strptime((record_starttime.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M'))- (datetime.datetime.strptime((icu_intime.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M'))120              elif '%.3f'%(fields['fs'])==  '1.000' :121                minutes_to_insert_start = record_starttime - icu_intime122              print('minutes_to_insert_start:  ', minutes_to_insert_start)123              duration_in_s = minutes_to_insert_start.total_seconds()124              minutes_to_insert_start = divmod(duration_in_s, 60)[0] - 1 125              try:126                df_ts_records_time_temp_start.drop(df_ts_records_time_temp_start.index,  inplace=True)127              except :128                print( 'df_ts_records_time_temp_start does not exist')129              130              df_ts_records_time_temp_start = pd.DataFrame(columns=df_ts_records_columns)131              if '%.3f'%(fields['fs'])== '0.017' :132                df_ts_records_time_temp_start['TIME'] = pd.date_range(icu_intime + datetime.timedelta(minutes=1), 133                                                              periods=minutes_to_insert_start, freq='1min'); 134              elif '%.3f'%(fields['fs'])== '1.000' :135                df_ts_records_time_temp_start['TIME'] = pd.date_range(icu_intime + datetime.timedelta(seconds=1), 136                                                              periods= (duration_in_s-1), freq='S'); 137              print ('INSERTING ONLY NULL IN START:')138              print (df_ts_records_time_temp_start)139              df_ts_indv_record_temp = df_ts_indv_record_temp.append(df_ts_records_time_temp_start, ignore_index=True);140              print('inserting nulls in start IN INDV LEVEL')141              print(df_ts_indv_record_temp)142            try:143              df_ts_records_temp.drop(df_ts_records_temp.index,  inplace=True)144            except:145              print( 'df_ts_records_temp does not exist')146            df_ts_records_temp = pd.DataFrame(columns=df_ts_records_columns)147            df_ts_records_temp['HR']= signals[:,idx_HR ] 148            df_ts_records_temp['SPO2']= signals[:,idx_SPO2 ] 149            df_ts_records_temp['ABPSYS']= signals[:,idx_ABPSYS ] 150            df_ts_records_temp['ABPDIAS']= signals[:,idx_ABPDIAS ] 151            df_ts_records_temp['ABPMEAN']= signals[:,idx_ABPMEAN ] 152            df_ts_records_temp['RESP']= signals[:,idx_RESP ] 153            if '%.3f'%(fields['fs'])== '0.017' :154              df_ts_records_temp['TIME'] = pd.date_range(record_starttime, periods=fields['sig_len'], freq='1min'); 155            elif '%.3f'%(fields['fs'])== '1.000' :156              df_ts_records_temp['TIME'] = pd.date_range(record_starttime, periods=fields['sig_len'], freq='S'); 157            df_ts_records_temp.TIME = pd.to_datetime(df_ts_records_temp.TIME)158            df_ts_indv_record_temp = df_ts_indv_record_temp.append(df_ts_records_temp, ignore_index=True); #safiya159            print('inserting nulls in start + first record data')160            print(df_ts_indv_record_temp)161            if '%.3f'%(fields['fs'])== '1.000' : #safiya162              print("AGGREGATING")163              start_idx = 0;164              df_ts_records_new = pd.DataFrame(columns=df_ts_records_columns);165              #print('length of new df  '  , df_ts_records_new.shape[0] )166              for index, rows in df_ts_indv_record_temp.iterrows():167                print('start index for first: ', start_idx)168                if start_idx >= df_ts_indv_record_temp.shape[0]:169                  exit;170                else: 171                  172                  #print(df_ts_records.iloc[start_idx: (start_idx+60), 2:8])173                  array = np.array( df_ts_indv_record_temp.iloc[start_idx: (start_idx+60), 2:8].mean(axis=0))174                  #print('printing array of average')175                  #print (array)176                  current_index = df_ts_records_new.shape[0]177                  df_ts_records_new.loc[current_index ,'HR']= array[0]178                  df_ts_records_new.loc[current_index,'SPO2']= array[1]179                  df_ts_records_new.loc[current_index,'ABPSYS']= array[2]180                  df_ts_records_new.loc[current_index,'ABPDIAS']= array[3]181                  df_ts_records_new.loc[current_index,'ABPMEAN']= array[4]182                  df_ts_records_new.loc[current_index,'RESP']= array[5]183                  #print(df_ts_records_new)184                  #print('next average')185                  start_idx = start_idx+60;186                  #print('start index :: ' , start_idx)187              print('# record time:  ',df_ts_records_new.shape[0])188              df_ts_records_new['TIME'] = pd.date_range(df_ts_indv_record_temp.loc[0,'TIME'], periods= df_ts_records_new.shape[0], freq='1min'); 189              df_ts_records_new.TIME = pd.to_datetime(df_ts_records_new.TIME)190              #print(df_ts_records_new)191              df_ts_indv_record_temp.drop(df_ts_indv_record_temp.index, inplace=True);192              #df_ts_records = pd.DataFrame(columns=df_ts_records_columns)193              df_ts_records = df_ts_records.append(df_ts_records_new, ignore_index=True);194              print('only first record  aggregated at individual record level: ')195              print(df_ts_records_new)196              print('inserting aggregated first record into  FINAL SUBJEC DATAFRAME')197              print(df_ts_records)198              df_ts_records_new.drop(df_ts_records_new.index, inplace=True)199              df_ts_records['RECORD'] = record   200            else:201              df_ts_records = df_ts_records.append(df_ts_indv_record_temp, ignore_index=True);202              df_ts_records['RECORD'] = record   203              print('inserting nulls in start + first record data into FINAL SUBJEC DATAFRAME')204              print(df_ts_records)205           206               207        else:208            if record_starttime <= icu_outtime :209              last_Record_time = df_ts_records.loc[(df_row_idx-1),'TIME']210              print('main DF last time record: ',last_Record_time )211              if '%.3f'%(fields['fs'])== '0.017' :212                minutes_to_insert = (datetime.datetime.strptime((record_starttime.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M')) - (datetime.datetime.strptime((last_Record_time.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M'))213              elif '%.3f'%(fields['fs'])== '1.000' :214                minutes_to_insert = record_starttime - last_Record_time215              duration_in_s = minutes_to_insert.total_seconds()216              minutes_to_insert = divmod(duration_in_s, 60)[0] - 1217              print ('minutes_to_insert:  ', minutes_to_insert);218              print('seconds to insert: ', duration_in_s)219              try:220                df_ts_records_time_temp.drop(df_ts_records_time_temp.index, inplace=True);221                df_ts_records_temp.drop(df_ts_records_temp.index, inplace=True);222              except:223                print ('df_ts_records_temp and df_ts_records_time_temp does not exits')224              df_ts_records_time_temp = pd.DataFrame(columns=df_ts_records_columns)225              if '%.3f'%(fields['fs'])== '0.017' :226                df_ts_records_time_temp['TIME'] = pd.date_range(last_Record_time + datetime.timedelta(minutes=1), 227                                                              periods=minutes_to_insert, freq='1min'); 228              elif  '%.3f'%(fields['fs'])== '1.000' :229                print('last record time' , last_Record_time)230                print('(duration_in_s-1)' , (duration_in_s-1))231                df_ts_records_time_temp['TIME'] = pd.date_range(last_Record_time + datetime.timedelta(seconds=1), 232                                                              periods=(duration_in_s-1), freq='S'); 233              print ('INSERTING ONLY NULL UNTILL NEXT RECORD START TIME:')234              print (df_ts_records_time_temp)235              df_ts_indv_record_temp = df_ts_indv_record_temp.append(df_ts_records_time_temp, ignore_index=True);236              print('inserting nulls UNTILL NEXT RECORD START TIME INTO INDV LEVEL')237              print(df_ts_indv_record_temp)238              df_ts_records_temp = pd.DataFrame(columns=df_ts_records_columns)239          240              df_ts_records_temp['HR']= signals[:,idx_HR ] 241              df_ts_records_temp['SPO2']= signals[:,idx_SPO2 ] 242              df_ts_records_temp['ABPSYS']= signals[:,idx_ABPSYS ] 243              df_ts_records_temp['ABPDIAS']= signals[:,idx_ABPDIAS ] 244              df_ts_records_temp['ABPMEAN']= signals[:,idx_ABPMEAN ] 245              df_ts_records_temp['RESP']= signals[:,idx_RESP ] 246              if '%.3f'%(fields['fs'])== '0.017' :247                df_ts_records_temp['TIME'] = pd.date_range(record_starttime, periods=fields['sig_len'], freq='1min'); 248              elif  '%.3f'%(fields['fs'])== '1.000' :249                df_ts_records_temp['TIME'] = pd.date_range(record_starttime, periods=fields['sig_len'], freq='S'); 250              df_ts_records_temp.TIME = pd.to_datetime(df_ts_records_temp.TIME)251            252              print('before appending: ')253            254              print( df_ts_records_temp);255              df_ts_indv_record_temp = df_ts_indv_record_temp.append(df_ts_records_temp, ignore_index=True);256              print('inserting nulls in start + SECOND record data')257              print(df_ts_indv_record_temp)258              if '%.3f'%(fields['fs'])== '1.000' : #safiya259                start_idx = 0;260                df_ts_records_new = pd.DataFrame(columns=df_ts_records_columns);261                #print('length of new df  '  , df_ts_records_new.shape[0] )262                for index, rows in df_ts_indv_record_temp.iterrows():263                  if start_idx >= df_ts_indv_record_temp.shape[0]:264                    exit;265                  else: 266                    267                    #print(df_ts_records.iloc[start_idx: (start_idx+60), 2:8])268                    array = np.array( df_ts_indv_record_temp.iloc[start_idx: (start_idx+60), 2:8].mean(axis=0))269                    #print('printing array of average')270                    #print (array)271                    current_index = df_ts_records_new.shape[0]272                    df_ts_records_new.loc[current_index ,'HR']= array[0]273                    df_ts_records_new.loc[current_index,'SPO2']= array[1]274                    df_ts_records_new.loc[current_index,'ABPSYS']= array[2]275                    df_ts_records_new.loc[current_index,'ABPDIAS']= array[3]276                    df_ts_records_new.loc[current_index,'ABPMEAN']= array[4]277                    df_ts_records_new.loc[current_index,'RESP']= array[5]278                    #print(df_ts_records_new)279                    #print('next average')280                    start_idx = start_idx+60;281                    #print('start index :: ' , start_idx)282                print('# record time:  ',df_ts_records_new.shape[0])283                df_ts_records_new['TIME'] = pd.date_range(df_ts_indv_record_temp.loc[0,'TIME'], periods= df_ts_records_new.shape[0], freq='1min'); 284                df_ts_records_new.TIME = pd.to_datetime(df_ts_records_new.TIME)285                #print(df_ts_records_new)286                df_ts_indv_record_temp.drop(df_ts_indv_record_temp.index, inplace=True);287                #df_ts_records = pd.DataFrame(columns=df_ts_records_columns)288                df_ts_records = df_ts_records.append(df_ts_records_new, ignore_index=True);289                290                print('only first record  aggregated at individual record level: ')291                print(df_ts_records_new)292                print('inserting aggregated first record into  FINAL SUBJEC DATAFRAME')293                print(df_ts_records)294                df_ts_records_new.drop(df_ts_records_new.index, inplace=True)295                df_ts_records['RECORD'] = record   296              else:297                df_ts_records = df_ts_records.append(df_ts_indv_record_temp, ignore_index=True);298                df_ts_records['RECORD'] = record   299                print('inserting nulls in start + first record data into FINAL SUBJEC DATAFRAME')300                print(df_ts_records)301           302              303        count_overlap = count_overlap +1304        print('overlap count after all insertions: ', count_overlap )305      else:306        print('Either all 6 signals not exists or there is no overlapt with recording time and ICU in time and out time')307last_record_idx = df_ts_records.shape[0] - 1308all_records_end_time = df_ts_records.loc[last_record_idx,'TIME']309      310if (all_records_end_time < icu_outtime  ):311  #print('INSERTING NULLS AT THE END')312  try:313    df_ts_records_time_temp_end.drop(df_ts_records_time_temp_end.index, inplace=True)314  except:315    print('df_ts_records_time_temp_end does not exists')316  #print('main DF last time record: ',last_Record_time )317  if '%.3f'%(fields['fs'])== '0.017' :318    minutes_to_insert_end =  (datetime.datetime.strptime((icu_outtime.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M')) - (datetime.datetime.strptime((all_records_end_time.strftime('%Y-%m-%d %H:%M' )), '%Y-%m-%d %H:%M')) 319  elif '%.3f'%(fields['fs'])== '1.000' :320    minutes_to_insert_end = icu_outtime - all_records_end_time;321  duration_in_s = minutes_to_insert_end.total_seconds()322  minutes_to_insert_end = divmod(duration_in_s, 60)[0] - 1323  df_ts_records_time_temp_end = pd.DataFrame(columns=df_ts_records_columns)324        325  df_ts_records_time_temp_end['TIME'] = pd.date_range(all_records_end_time + datetime.timedelta(minutes=1), 326                                                              periods=minutes_to_insert_end, freq='1min'); 327  df_ts_records = df_ts_records.append(df_ts_records_time_temp_end, ignore_index=True);328      329  df_ts_records['RECORD'] = record330print('printing final data for this patient')...spl_recorder_hd.py
Source:spl_recorder_hd.py  
1#!/usr/bin/env python2# -*- coding: utf-8 -*-3from directorymapper import DirectoryMapper4from splthread import SplThread5from messagehandler import Query6from classes import MovieInfo7import defaults8from defaults import Record_States9from scheduler import Scheduler10from jsonstorage import JsonStorage11import json12import os13import sys14import time15import threading16import base6417import subprocess18from urllib.parse import urlparse19# Standard module20# Non standard modules (install with pip)21# own local modules22ScriptPath = os.path.realpath(os.path.join(23	os.path.dirname(__file__), "../../../common"))24# Add the directory containing your module to the Python path (wants absolute paths)25sys.path.append(os.path.abspath(ScriptPath))26import schnipsllogger27logger = schnipsllogger.getLogger(__name__)28class SplPlugin(SplThread):29	plugin_id = 'record_hd'30	plugin_names = ['HD Recorder']31	def __init__(self, modref):32		''' inits the plugin33		'''34		self.modref = modref35		# do the plugin specific initialisation first36		self.origin_dir = os.path.dirname(__file__)37		self.config = JsonStorage(self.plugin_id, 'backup', "config.json", {'path': DirectoryMapper.abspath(self.plugin_id, 'videos','', True), 'www-root': 'http://schnipsl:9092/'})38		self.records = JsonStorage(self.plugin_id, 'runtime', "records.json", {})39		self.record_threats={} # we need to store the thread pointers seperate from self.records, as we can't store them as json40		self.last_recorded_time =  0 # remembers how long the last recording action is away41		# at last announce the own plugin42		super().__init__(modref.message_handler, self)43		modref.message_handler.add_event_handler(44			self.plugin_id, 0, self.event_listener)45		modref.message_handler.add_query_handler(46			self.plugin_id, 0, self.query_handler)47		self.runFlag = True48	def event_listener(self, queue_event):49		if queue_event.type == defaults.TIMER_RECORD_REQUEST:50			self.timer_record_request(queue_event.data)51		# for further pocessing, do not forget to return the queue event52		return queue_event53	def query_handler(self, queue_event, max_result_count):54		''' try to send simulated answers55		'''56		# logger.info(f"hd_recorder query handler" {queue_event.type}  {queue_event.user} {max_result_count"})57		if queue_event.type == defaults.QUERY_MOVIE_ID:58			new_uri=queue_event.params59			for record_movie in self.records.read('all',{}).values(): # 'all': read the whole config60				if record_movie['new_uri']==new_uri:61					return [MovieInfo(62								source=self.plugin_names[0],63								source_type=defaults.MOVIE_TYPE_RECORD,64								provider=record_movie['new_uri'].split(':')[1], # extracts the original provider back out of the uri65								category=record_movie['category'],66								title=record_movie['title'],67								timestamp=record_movie['timestamp'],68								duration=record_movie['duration'],69								description=record_movie['description'],70								url=record_movie['new_url'],71								mime=record_movie['mime']72					)]73		return[]74	def _run(self):75		''' starts the server76		'''77		scheduler = Scheduler(78			[(self.check_for_records, 10), (self.cleanup_records, 60)])79		while self.runFlag:80			scheduler.execute()81			time.sleep(2)82	def _stop(self):83		self.runFlag = False84	def timer_record_request(self, data):85		uri = data['uri']86		uuid = data['uuid']87		movie_info_list = self.modref.message_handler.query(88			Query(None, defaults.QUERY_MOVIE_ID, uri))89		if movie_info_list:90			movie_info = movie_info_list[0]91			uri = movie_info['uri']92			# do we have that record request already93			existing_record = self.records.read(uri)94			if not existing_record:95				uri_base64 = base64_encode(uri)96				ext='.mp4'97				if movie_info['mime']=='video/MP2T':98					ext='.mp4'99				file_path = DirectoryMapper.abspath('','videos', self.config.read('path')+ uri_base64+ext)100				if movie_info['source_type'] == defaults.MOVIE_TYPE_RECORD:101					self.records.write(uri, {102						# in case of a record we set start and duration to 0 to indicate that the recording can start immediadly & has no duration103						'record_starttime': 0,104						'record_duration': 0,105						'provider': movie_info['provider'],106						'category': movie_info['category'],107						'title': movie_info['title'],108						'timestamp': movie_info['timestamp'],109						'duration': movie_info['duration'],110						'description': movie_info['description'],111						'url': movie_info['url'],112						'mime': movie_info['mime'],113						'uri': uri,114						'new_uri': self.plugin_names[0]+':'+':'.join(movie_info['uri'].split(':')[1:]),115						'new_url': self.config.read('www-root')+uri_base64+ext,116						'uuid': uuid,117						'file_path': file_path,118						'state': Record_States.WAIT_FOR_RECORDING,119						'errorcount': 4 # try to start the record up to 4 times before it finally failes120					})121				if movie_info['source_type'] == defaults.MOVIE_TYPE_STREAM:122					# recording a stream with a duration of 0 is a very bad idea, because it would never stop..123					if movie_info['duration']:124						self.records.write(uri, {125							'record_starttime': movie_info['timestamp'],126							'record_duration': movie_info['duration'],127							'category': movie_info['category'],128							'title': movie_info['title'],129							'timestamp': movie_info['timestamp'],130							'duration': movie_info['duration'],131							'description': movie_info['description'],132							'url': movie_info['url'],133							'mime': movie_info['mime'],134							'uri': uri,135							'new_uri': self.plugin_names[0]+':'+':'.join(movie_info['uri'].split(':')[1:]),136							'new_url': self.config.read('www-root')+uri_base64+ext,137							'uuid': uuid,138							'file_path': file_path,139							'state': Record_States.WAIT_FOR_RECORDING,140							'errorcount': 4 # try to start the record up to 4 times before it finally failes141						})142	143	def check_for_records(self):144		act_time = time.time()145		for uri, record in self.records.read('all','').items():146			if record['state'] == Record_States.WAIT_FOR_RECORDING:147				if record['record_duration'] == 0:  # this is a record, which can be recorded immediadly148					record['state'] = Record_States.ACTUAL_RECORDING149					self.records.write(uri, record)150					self.recording(record)151					continue152				# something went wrong, the record time was in the past. Mark the entry as failed153				if record['record_starttime']+record['record_duration'] < act_time:154					record['state'] = Record_States.RECORDING_FAILED155				# something went wrong during recording156				if record['state'] == Record_States.RECORDING_FAILED:157					self.records.write(uri, record)158					self.deploy_record_result(record, record['state'])159					continue160				# it's time to start161				if record['record_starttime']-self.config.read('padding_secs', 300) <= act_time and record['record_starttime']+record['record_duration'] > act_time:162					# in case the movie has already started, we correct starttime and duration to show the real values163					if record['record_starttime'] < act_time:164						record['starttime'] = str(act_time)165						record['duration'] = record['duration'] - (act_time - record['record_starttime'])166					record['state'] = Record_States.ACTUAL_RECORDING167					self.records.write(uri, record)168					self.recording(record)169					continue170	def cleanup_records(self):171		records_to_delete={}172		act_time=time.time()173		# request which movies are still in the UI list174		valid_movieuri_list = self.modref.message_handler.query(175				Query(None, defaults.QUERY_VALID_MOVIE_RECORDS, {'source':self.plugin_names[0]}))176		for uri, record in self.records.config.items():177			if uri in self.record_threats:178				# recording is finished, so deploy the result179				if not self.record_threats[uri].is_alive():180					del(self.record_threats[uri])  # we destroy the thread181					self.deploy_record_result(record,182						record['state'] )183					self.last_recorded_time=act_time184			if self.last_recorded_time> act_time-5*60:185				return # don't do any delete action if the last record is just 5 mins ago to give the UI some time to adapt the new movie186			if record['state'] == Record_States.ACTUAL_RECORDING and not uri in self.record_threats: # seems to be a zombie record187				records_to_delete[uri]=record188				self.deploy_record_result(record,189						Record_States.RECORDING_FAILED )190			if record['state'] == Record_States.RECORDING_FINISHED or record['state'] == Record_States.RECORDING_FAILED:191				new_uri=record['new_uri']192				#logger.info(f'Record on disk: {new_uri}') 193				if not new_uri in valid_movieuri_list:194					records_to_delete[uri]=record195		# some debug output196		#for uri in valid_movieuri_list:197		#	logger.info(f'recoder uri: {uri}')198		if records_to_delete:199			# go through the list of records to be deleted200			for uri,  record in records_to_delete.items():201				# delete the file202				file_path=record['file_path']203				logger.info(f'try to delete file {file_path}' )204				if os.path.exists(file_path):205					try:206						os.remove(file_path)207						logger.info(f'file deleted {file_path}' )208					except Exception as ex:209						logger.warning("Cant delete record file {0}. Error: {1}".format(file_path,str(ex)))210				else:211					# remove the entry212					logger.info(f'file not found, just remove the entry {uri}' )213				del(self.records.config[uri])214			self.records.save()215	def deploy_record_result(self, record, record_state):216		# save changes217		self.records.write(record['uri'], record)218		self.modref.message_handler.queue_event(None, defaults.TIMER_RECORD_RESULT, {219			'new_uri':record['new_uri'], 'new_url':record['new_url'], 'uuid': record['uuid'], 'record_state': record_state})220	def recording(self, record):221		uri=record['uri']222		logger.info(f'try to record {uri}')223		threat = threading.Thread(target=record_thread, args=(224			record, self.config.read('padding_secs', 300)))225		self.record_threats[uri] = threat226		threat.start()227def record_thread(record, padding_time):228	file_path = record['file_path']229	url = record['url']230	act_time = time.time()231	remaining_time = record['record_starttime']+record['record_duration']-act_time232	################ debug tweak to keep the records short - reduce the records to 30 secs.233	if False:234		remaining_time=25235		padding_time=5236	attr = None237	# does the record has a duration? then we've use ffmeg to limit the duration238	if record['record_duration']:239		# attr = ['ffmpeg', '-y', '-i', url, '-vcodec', 'copy', '-acodec', 'copy', 	'-map', '0:v', '-map', '0:a', '-t', str(remaining_time+padding_time), '-f', 'ts' , file_path]240		attr = ['ffmpeg', '-y', '-rw_timeout', '5000',  '-i', url, '-vcodec', 'copy', '-acodec', 'copy', '-t', str(remaining_time+padding_time), file_path]241	else:242		attr = ['curl', '-s', url, '-o', file_path]  # process arguments243	if attr:244		logger.info(f"recorder started {repr(attr)}" )245		try:246			completed_process = subprocess.run(attr)247			if completed_process.returncode:248				logger.warning("recorder ended with an error:\n%s" %249					  (completed_process.returncode))250				try:251					record['errorcount']-=1252				except:253					record['errorcount']=4 # just a temporary fix to avoid a crash on older configs254				if record['errorcount']<=0:255					record['state'] = Record_States.RECORDING_FAILED256					logger.info(f"recorder max error count reached, recording finally failed" )257				else: # give it another try258					record['state'] =Record_States.WAIT_FOR_RECORDING259					logger.info(f"recorder error count {record['errorcount']}, try it again soon.." )260			else:261				logger.info("recorder ended")262				record['state'] = Record_States.RECORDING_FINISHED263		except Exception as ex:264			logger.warning("recorder could not be started. Error: %s" % (ex))265	else:266		record['state'] = Record_States.RECORDING_FAILED267def base64_encode(string):268	"""269	Removes any `=` used as padding from the encoded string.270	"""271	encoded = base64.urlsafe_b64encode(string.encode())272	return encoded.decode().replace('=', '')273def base64_decode(string):274	"""275	Adds back in the required padding before decoding.276	"""277	padding = 4 - (len(string) % 4)278	string = string + ("=" * padding)...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
