How to use attribute_values method in Selene

Best Python code snippet using selene_python

read_data.py

Source:read_data.py Github

copy

Full Screen

1import data_prep2def read_dataset(dataset_key):3 if dataset_key == "KDD_train+" or dataset_key == "KDD_test+":4 if dataset_key == "KDD_train+":5 file_path = "res/mod_NSL/KDDTrain+.txt"6 else:7 file_path = "res/mod_NSL/KDDTest+.txt"8 dataset = []9 attr_names = [10 'duration',11 'protocol_type',12 'service',13 'flag',14 'src_bytes',15 'dst_bytes',16 'land',17 'wrong_fragment',18 'urgent',19 'hot',20 'num_failed_logins',21 'logged_in',22 'num_compromised',23 'root_shell',24 'su_attempted',25 'num_root',26 'num_file_creations',27 'num_shells',28 'num_access_files',29 'num_outbound_cmds',30 'is_host_login',31 'is_guest_login',32 'count',33 'srv_count',34 'serror_rate',35 'srv_serror_rate',36 'rerror_rate',37 'srv_rerror_rate',38 'same_srv_rate',39 'diff_srv_rate',40 'srv_diff_host_rate',41 'dst_host_count',42 'dst_host_srv_count',43 'dst_host_same_srv_rate',44 'dst_host_diff_srv_rate',45 'dst_host_same_src_port_rate',46 'dst_host_srv_diff_host_rate',47 'dst_host_serror_rate',48 'dst_host_srv_serror_rate',49 'dst_host_rerror_rate',50 'dst_host_srv_rerror_rate',51 'class'52 ]53 value_space_dict = {'protocol_type': ['tcp', 'udp', 'icmp'],54 'service': ['aol', 'auth', 'bgp', 'courier', 'csnet_ns', 'ctf', 'daytime', 'discard',55 'domain', 'domain_u', 'echo', 'eco_i', 'ecr_i', 'efs', 'exec', 'finger', 'ftp',56 'ftp_data', 'gopher', 'harvest', 'hostnames', 'http', 'http_2784', 'http_443',57 'http_8001', 'imap4', 'IRC', 'iso_tsap', 'klogin', 'kshell', 'ldap', 'link',58 'login', 'mtp', 'name', 'netbios_dgm', 'netbios_ns', 'netbios_ssn', 'netstat',59 'nnsp', 'nntp', 'ntp_u', 'other', 'pm_dump', 'pop_2', 'pop_3', 'printer',60 'private', 'red_i', 'remote_job', 'rje', 'shell', 'smtp', 'sql_net', 'ssh',61 'sunrpc', 'supdup', 'systat', 'telnet', 'tftp_u', 'tim_i', 'time', 'urh_i',62 'urp_i', 'uucp', 'uucp_path', 'vmnet', 'whois', 'X11', 'Z39_50'],63 'flag': ['OTH', 'REJ', 'RSTO', 'RSTOS0', 'RSTR', 'S0', 'S1', 'S2', 'S3', 'SF', 'SH'],64 'class': []}65 with open(file_path, 'r') as file:66 for line in file:67 attribute_values = line.split(',')68 # Remove "\n" at the last attribute69 attribute_values[len(attribute_values) - 1] = attribute_values[len(attribute_values) - 1][:len(attribute_values[len(attribute_values) - 1]) - 1]70 # Remove the last (weird) attribute in each row71 attribute_values.pop(len(attribute_values) - 1)72 attribute_values[0] = float(attribute_values[0])73 for index in range(4, 41):74 attribute_values[index] = float(attribute_values[index])75 data_row = dict(zip(attr_names, attribute_values))76 if data_row['class'] not in value_space_dict['class']:77 value_space_dict['class'].append(data_row['class'])78 dataset.append(data_row)79 return dataset, value_space_dict80 elif dataset_key == "iris_train" or dataset_key == "iris_test":81 if dataset_key == "iris_train":82 file_path = "res/iris_data/iris_train.data"83 else:84 file_path = "res/iris_data/iris_test.data"85 attr_names = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]86 dataset = []87 value_space_dict = {'class' : []}88 with open(file_path, 'r') as file:89 for line in file:90 if len(line) < 4:91 continue92 attribute_values = line.split(',')93 # Remove "\n" at the last attribute94 attribute_values[len(attribute_values) - 1] = attribute_values[len(attribute_values) - 1][:len(attribute_values[len(attribute_values) - 1]) - 1]95 for index in range(0, 4):96 attribute_values[index] = float(attribute_values[index])97 data_row = dict(zip(attr_names, attribute_values))98 if data_row['class'] not in value_space_dict['class']:99 value_space_dict['class'].append(data_row['class'])100 dataset.append(data_row)101 return dataset, value_space_dict102 elif dataset_key == "NB15_train" or dataset_key == "NB15_test":103 if dataset_key == "NB15_train":104 file_path = "res/UNSW-NB15/UNSW_NB15_training-set.csv"105 else:106 file_path = "res/UNSW-NB15/UNSW_NB15_testing-set.csv"107 attr_names = ["dur","proto","service","state","spkts","dpkts","sbytes","dbytes","rate","sttl","dttl","sload","dload","sloss","dloss","sinpkt","dinpkt","sjit","djit","swin","stcpb","dtcpb","dwin","tcprtt","synack","ackdat","smean","dmean","trans_depth","response_body_len","ct_srv_src","ct_state_ttl","ct_dst_ltm","ct_src_dport_ltm","ct_dst_sport_ltm","ct_dst_src_ltm","is_ftp_login","ct_ftp_cmd","ct_flw_http_mthd","ct_src_ltm","ct_srv_dst","is_sm_ips_ports","class"]108 dataset = []109 value_space_dict = {'class': ["Normal", "Intrusion"]}110 line_num = 0111 with open(file_path, 'r') as file:112 for line in file:113 if line_num == 0:114 line_num += 1115 continue116 if len(line) < 4:117 continue118 attribute_values = line.split(',')119 # Remove the "id" attribute and the attack_type120 attribute_values.pop(0)121 attribute_values.pop(len(attribute_values)-1)122 # Remove "\n" at the last attribute123 # attribute_values[len(attribute_values) - 1] = attribute_values[len(attribute_values) - 1][:len(attribute_values[len(attribute_values) - 1]) - 1]124 # Make all the numbers to floats125 attribute_values[0] = float(attribute_values[0])126 for index in range(4, len(attribute_values)-2):127 attribute_values[index] = float(attribute_values[index])128 data_row = dict(zip(attr_names, attribute_values))129 if data_row['class'] != "Normal":130 data_row['class'] = "Intrusion"131 dataset.append(data_row)132 return dataset, value_space_dict133def calculate_intervals(dataset_key, dataset, value_space_dict):134 if dataset_key == "KDD_train+" or dataset_key == "NB15_train":135 print("Interval calculations has begun.")136 float_attr = {}137 # Initiate lists for float attributes138 for attr in dataset[0]:139 if isinstance(dataset[0][attr], float):140 float_attr[attr] = []141 print("Floats initiated.")142 # Parse all float values to lists143 for data_row in dataset:144 for attr in float_attr:145 float_attr[attr].append([data_row[attr], data_row['class']])146 print("Floats parsed to lists")147 # Calculate the interval cuts148 interval_cuts = {}149 count_finished_attr = 0150 for attr in float_attr:151 interval_cuts[attr] = data_prep.mdlp(float_attr[attr])152 value_space_dict[attr] = interval_cuts[attr]153 count_finished_attr += 1154 print("Calculate intervals status: {} of {} attribute intervals created.".format(count_finished_attr, len(float_attr)))155 return interval_cuts156 else:157 print("Error: Unkown dataset key {} for interval calculation.".format(dataset_key))158def find_interval_value(data_value, interval_cuts):159 for cut in interval_cuts:160 if data_value <= cut:161 return str(cut)162 return str(-1)163def discretize_to_intervals(dataset, intervals):164 for data_row in dataset:165 for attr_name in intervals:...

Full Screen

Full Screen

alibaba_scrapper.py

Source:alibaba_scrapper.py Github

copy

Full Screen

1import requests2from bs4 import BeautifulSoup3import bs44import re5import pandas as pd6def init_attribute_holders(attributes_for_selection, attribute_values):7 for attribute in attributes_for_selection:8 attribute_values[attribute] = []9def update_attributes(soup_element, attributes_for_selection, attribute_values):10 soup_attribute_name = soup_element.find('span')['title']11 for attribute_name in attributes_for_selection:12 if attribute_name == soup_attribute_name:13 soup_value = soup_element.find('div')['title']14 attribute_values[soup_attribute_name].append(soup_value)15 return soup_attribute_name16def fill_in_not_updated_attributes(not_updated_attributes, attribute_values):17 for attribute in not_updated_attributes:18 attribute_values[attribute].append(None)19def scrap_item(link, attributes_for_selection, attribute_values):20 page = requests.get(link)21 # check the page status; if success then it should be 20022 if page.status_code != 200:23 return24 soup = BeautifulSoup(page.content, 'html.parser')25 # this selector gets the tag where the main image is located26 main_image_element = soup.select_one('img#J-dcv-image-trigger')27 # to check if the image element exists'(some pages have video instead)28 if main_image_element is None:29 return30 attribute_values["image_link"].append(main_image_element["data-src"])31 # the following element contains product details; 0 element is the list of details32 details = soup.select_one('div.do-entry-list')33 not_updates_attributes = set(attributes_for_selection)34 for detail in details:35 # There are strings in the details list too, this condition prevents them from scrapping36 if isinstance(detail, bs4.element.Tag):37 updated_attribute = update_attributes(detail, attributes_for_selection, attribute_values)38 if updated_attribute in not_updates_attributes:39 not_updates_attributes.remove(updated_attribute)40 # to make the length of a list of values the same for a postprocessing step, not present values are updated as well41 fill_in_not_updated_attributes(not_updates_attributes, attribute_values)42 return43def get_next_page_link(soup, current_page_number, seller_main_page):44 pagination_list = soup.select_one('div.next-pagination-list')45 displayed_pages = pagination_list.select('a.next-pagination-item')46 for page in displayed_pages:47 # the next page number must be higher than the current.48 # since page number are shown in ascending order, this check will allow to select only the next page49 if int(page.text) > current_page_number:50 next_page_link = seller_main_page + page["href"]51 return next_page_link52def scrap_items(soup, attributes_for_selection, attribute_values, seller_main_page):53 product_list = soup.select_one('div.component-product-list')54 items = product_list.select('div.product-info')55 item_links = []56 for item in items:57 item_links.append(seller_main_page + item.select_one('a.title-link')['href'])58 for item_link in item_links:59 scrap_item(item_link, attributes_for_selection, attribute_values)60def scrap_items_page(item_list_link, seller_main_page, current_page_number, attributes_for_selection, attribute_values):61 page = requests.get(item_list_link)62 # check the page status; if success then it should be 20063 if page.status_code != 200:64 return65 soup = BeautifulSoup(page.content, 'html.parser')66 scrap_items(soup, attributes_for_selection, attribute_values, seller_main_page)67 next_page_link = get_next_page_link(soup, current_page_number, seller_main_page)68 return next_page_link69def scrap_init_seller_page(init_link, attributes_for_selection, attribute_values):70 page = requests.get(init_link)71 seller_main_page = re.search("https://[\w\.]+", init_link).group(0)72 # check the page status; if success then it should be 20073 if page.status_code != 200:74 return75 # the initial page is the first page with items76 next_seller_page = init_link77 # initial page is 178 current_page_number = 179 while next_seller_page is not None:80 next_seller_page = scrap_items_page(next_seller_page, seller_main_page, current_page_number,81 attributes_for_selection, attribute_values)82 current_page_number += 183def scrap_sellers(seller_links, tags_for_selection):84 # to prevent the image link to be scrapped as a text attribute, an additional holder for the link is created85 all_attributes = tags_for_selection + ["image_link"]86 attribute_values = {}87 init_attribute_holders(all_attributes, attribute_values)88 # tags for selection doesn't include image. Image is processed separately89 for link in seller_links:90 scrap_init_seller_page(link, tags_for_selection, attribute_values)...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run Selene automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful