How to use process_company method in tox

Best Python code snippet using tox_python

pipelines.py

Source:pipelines.py Github

copy

Full Screen

...22 def process_vacancy(cls, d, spider):23 pass24 @classmethod25 @abstractmethod26 def process_company(cls, d, spider):27 pass28class RabotaByProcessor(BaseProcessor):29 name = "rabota_by"30 vacancy_id_p = re.compile(r"(?<=/vacancy/)(?P<id>\d+)")31 posted_p = re.compile(r"(?P<day>\d\d?) (?P<month>\w+) (?P<year>\d{4})")32 month_mapping = dict(33 zip(34 (35 "января",36 "февраля",37 "марта",38 "апреля",39 "мая",40 "июня",41 "июля",42 "августа",43 "сентября",44 "октября",45 "ноября",46 "декабря",47 ),48 range(1, 13),49 )50 )51 salary_p = re.compile(52 r"(от (?P<min>\d+) )?(до (?P<max>\d+) )?(?P<currency>руб.|бел.руб.|USD|EUR)"53 )54 currency_mapping = {55 "USD": Currency.USD.value,56 "бел.руб.": Currency.BYN.value,57 "руб.": Currency.RUB.value,58 "EUR": Currency.EUR.value,59 }60 @classmethod61 def process_company(cls, d, spider):62 item = CompanyItem(63 location=d["address"],64 description=d["description"],65 external_logo_url=d["logo_url"],66 external_url=d["url"],67 external_site=cls.name,68 )69 if d["name"]:70 item["name"] = d["name"]71 return item72 @classmethod73 def process_vacancy(cls, d, spider):74 url = urlparse(d["url"])75 item = VacancyItem(76 title=d["title"],77 url=d["url"],78 site_type_name=spider.name,79 address=d["address"],80 experience=d["experience"],81 employment_mode=d["employment_mode"],82 description=d["description"],83 )84 item["vacancy_id"] = int(os.path.basename(urlparse(d["url"]).path))85 item["company_name"] = d["company_name"].replace("\xa0", " ")86 item["company_link"] = (87 d["company_link"]88 if not d["company_link"].startswith("/")89 else urlunparse((url.scheme, url.netloc, d["company_link"], "", "", ""))90 )91 salary_parsed = cls.salary_p.search(d["salary"].replace("\xa0", ""))92 if salary_parsed:93 item["currency"] = cls.currency_mapping[salary_parsed["currency"]]94 if salary_parsed["min"]:95 item["salary_min"] = int(salary_parsed["min"])96 if salary_parsed["max"]:97 item["salary_max"] = int(salary_parsed["max"])98 vacancy_id_parsed = cls.vacancy_id_p.search(url.path)99 if not vacancy_id_parsed:100 raise DropItem(101 f"Can't find vacancy id for url {d['url']} - {url.path}", logging.ERROR102 )103 else:104 item["vacancy_id"] = int(vacancy_id_parsed["id"])105 if d["skills"]:106 item["skills"] = ", ".join(sorted(d["skills"])).replace("\xa0", " ")107 posted_parsed = cls.posted_p.search(d["posted"].replace("\xa0", " "))108 if not posted_parsed:109 raise DropItem(110 f"Can't find posted date for url {d['url']} - {d['posted']}",111 logging.ERROR,112 )113 else:114 item["posted"] = date(115 day=int(posted_parsed["day"]),116 month=cls.month_mapping[posted_parsed["month"]],117 year=int(posted_parsed["year"]),118 )119 item["hash"] = item.get_hash()120 return item121class DevByProcessor(BaseProcessor):122 name = "dev_by"123 vacancy_id_p = re.compile(r"(?<=/vacancies/)(?P<id>\d+)")124 salary_p = re.compile(r"(от )?\$?(?P<min>\d+)?—?(до )?\$?(?P<max>\d+)?")125 e_count_p = re.compile(r"\d+")126 @classmethod127 def process_vacancy(cls, d, spider):128 url = urlparse(d["url"])129 item = VacancyItem(130 title=d["title"],131 url=d["url"],132 site_type_name=cls.name,133 company_name=d["company_name"],134 company_link=d["company_link"],135 description=d["description"],136 )137 options = {item[0][:-2]: item[1] for item in d["options"]}138 item["vacancy_id"] = int(os.path.basename(urlparse(d["url"]).path))139 if "Зарплата" in options:140 salary_parsed = cls.salary_p.search(options["Зарплата"])141 if salary_parsed:142 item["currency"] = Currency.USD.value143 if salary_parsed["max"]:144 item["salary_max"] = int(salary_parsed["max"])145 if salary_parsed["min"]:146 item["salary_min"] = int(salary_parsed["min"])147 else:148 raise DropItem(149 f"Can't parse salary({options['Зарплата']}) for this url {d['url']}",150 level=logging.ERROR,151 )152 vacancy_id_parsed = cls.vacancy_id_p.search(url.path)153 if not vacancy_id_parsed:154 raise DropItem(155 f"Can't find vacancy id for url {d['url']} - {url.path}", logging.ERROR156 )157 else:158 item["vacancy_id"] = int(vacancy_id_parsed["id"])159 if options.get("Уровень английского", "Не важно") != "Не важно":160 d["skills"].append(f"English - {options['Уровень английского']}")161 if d["skills"]:162 item["skills"] = ", ".join(163 sorted([item.capitalize() for item in sorted(d["skills"])])164 )165 for (option_name, out_name) in (166 ("Опыт", "experience"),167 ("Город", "address"),168 ("Режим работы", "employment_mode"),169 ):170 if options.get(option_name):171 item[out_name] = options[option_name]172 if options.get("Возможна удалённая работа", "Да") == "Да":173 prefix = (174 f'{item["employment_mode"]}, ' if item.get("employment_mode") else ""175 )176 item["employment_mode"] = prefix + "удалённая работа"177 item["hash"] = item.get_hash()178 return item179 @classmethod180 def process_company(cls, d, spider):181 for field in ("name", "description", "employees"):182 if not d[field]:183 raise DropItem(f"Can't extract {field} field", level=logging.ERROR)184 item = CompanyItem(185 name=d["name"],186 location=d["address"],187 description=d["description"],188 external_logo_url=d["logo_url"],189 external_url=d["url"],190 external_site=cls.name,191 )192 if item["external_logo_url"]:193 item["external_logo_url"] = item["external_logo_url"].replace(194 "pre_medium_white", "original"195 )196 if cls.e_count_p.search(d["employees"]):197 item["employee_count"] = int(cls.e_count_p.search(d["employees"]).group(0))198 return item199class MainPipeline:200 def process_item(self, item, spider):201 method = "process_" + ("vacancy" if spider.is_vacancy else "company")202 return getattr(eval(spider.processor), method)(item, spider)203class SaveDbPipeline:204 def __init__(self):205 self.currency_map = {206 item.value: CurrencyDjango.objects.get(name=item.value).id207 for item in Currency208 }209 self.site_type_map = {210 spider.name: SiteType.objects.get(name=spider.name).id211 for spider in (RabotaBySpider, DevBySpider)212 }213 def process_vacancy(self, item, spider):214 item["site_type_id"] = self.site_type_map[item.pop("site_type_name")]215 if "currency" in item:216 item["currency_id"] = self.currency_map[item.pop("currency")]217 item["is_internal"] = False218 company = Company.objects.get_or_create(219 external_site_id=item["site_type_id"], external_url=item["company_link"]220 )[0]221 if company.name != item["company_name"]:222 company.name = item["company_name"]223 company.save()224 item["company"] = company225 item.fill_defaults()226 try:227 vacancy = Vacancy.objects.get(228 site_type_id=item["site_type_id"], vacancy_id=item["vacancy_id"]229 )230 except Vacancy.DoesNotExist:231 vacancy = None232 if vacancy:233 if vacancy.hash.tobytes() == item["hash"]:234 raise DropItem(235 f"{item['url']} has the same hash as in db",236 level=logging.INFO,237 override_msg=True,238 )239 for key in item:240 setattr(vacancy, key, item[key])241 vacancy.save(update_fields=list(item))242 spider.log(243 f"{item['url']} is already in db, updated some fields",244 level=logging.INFO,245 )246 else:247 spider.log(f"Added new vacancy {item['url']}", level=logging.INFO)248 Vacancy.objects.create(**item)249 def process_company(self, item, spider):250 spider.log(f"Updating company {item['external_url']}", level=logging.INFO)251 item['last_updated'] = datetime.datetime.utcnow()252 Company.objects.filter(253 external_site_id=self.site_type_map[item.pop("external_site")],254 external_url=item.pop("external_url"),255 ).update(**item)256 def process_item(self, item, spider):257 if spider.is_vacancy:258 self.process_vacancy(item, spider)259 else:...

Full Screen

Full Screen

predictor.py

Source:predictor.py Github

copy

Full Screen

...78 num_days79@returns80 none81'''82def process_company(ticker, num_days, useSpread, useVolume):83 #initialize share with the company ticker84 try:85 company = Share(ticker)86 except (gaierror):87 print "\nNot connected!\n"88 sys.exit()89 90 day1, day2 = get_dates(num_days)91 92 historical = company.get_historical(day1, day2)93 if len(historical) is 0:94 print "Error! Please check your inputs and try again"95 #print len(historical), "Days of historical data"96 else:97 #reverse the list 98 historical.reverse()99 #print len(historical)100 101 unscaled_opening = gh.get_unscaled_opening(historical)102 103 #--------------------------------#104 scaler = scale.get_scaler(unscaled_opening)105 106 #get training and target data107 training, target, scaled_training, scaled_target = gh.training_data(historical, company, scaler, useSpread, useVolume)108 109 #get current trading day's data110 this_day, scaled_today = td.get_trading_day(company, scaler, useSpread, useVolume) 111 #--------------------------------------------------------------------#112 clf = svm.SVR(gamma=0.000001, C=1e3, kernel='rbf') # gamma = 0.00000001 for 10 days113 #Fit takes in data (#_samples X #_of_features array), and target(closing - 1 X #_of_Sample_size array)114 clf.fit(scaled_training,scaled_target)115 116 #predict takes in today's 117 predict = clf.predict(this_day)118 #print_info(company, ticker, predict)119 clf.fit(scaled_training,scaled_target)120 predict = clf.predict(scaled_today)121 #print predict122 pre = scaler.inverse_transform(predict)123 124 #print "Using scaled data"125 print_info(company, ticker, pre)126'''127'''128def gui_call(ticker, days, spreadV, volumeV):129 num_days = days130 131 useSpread = False 132 useVolume = False133 useAverage = False134 if (spreadV == 1):135 useSpread = True136 if (volumeV == 1):137 useVolume = True138 DJIA = 'djia'139 if ticker.upper() == DJIA.upper():140 tickers = cn.get_djia_list()141 for i in range(len(tickers)):142 process_company(tickers[i], num_days, useSpread, useVolume)143 else: 144 process_company(ticker, num_days, useSpread, useVolume)145'''146Main - driver of the program. Parses the command line arguments and calls precess company for given stock (based on ticker)147 if 'djia' is entered calls process_company for all 30 companies in Dow Jones Industrial Average148'''149def main(args):150 ticker = args['<ticker>']151 num_days = args['<num_days>']152 num_days = int(num_days)153 154 DJIA = 'djia'155 156 if ticker.upper() == DJIA.upper():157 tickers = cn.get_djia_list()158 for i in range(len(tickers)):159 process_company(tickers[i], num_days, True, False)160 else: 161 process_company(ticker, num_days, True, False)162'''163Calls Main.164Uses Docopt module to parse the command line arguments165'''166if __name__ == '__main__':167 args = docopt(__doc__)...

Full Screen

Full Screen

web_scrapping.py

Source:web_scrapping.py Github

copy

Full Screen

1import requests2from bs4 import BeautifulSoup3import re4import fnmatch5import pandas as pd6from urllib3 import request78# from urllib3 import pa910# list of websites to extract company names11website = ['http://ppprocessingltd.co.uk', 'http://www.feredaycylinder.com'12 , 'https://alphalaser.de'13 , 'https://www.onealmfgservices.com'14 , 'https://www.taupitz.biz'15 , 'http://www.bolducleroux.ca'16 , 'https://www.ferricmachinery.com'17 , 'http://www.jfe-steel.co.jp'18 , 'https://mewi-metallform.de'19 ]2021# Based on inspecting each website, some of the key words found after inspecting2223pattern = ['GmbH', 'Ltd', 'Limited', 'Spa', 'Machinery', 'Corporation', 'Leroux', 'Group', 'Inc']24# list of tags25tags = ['p', 'a', 'l1', 'h1', 'font', 'li', 'ul','td']2627# Soup & REGEX libraries used to extract company names2829def scrap_company(site):30 # r site in website1:31 print("website = ", site)32 data = requests.get(site)33 websoup = BeautifulSoup(data.text, 'html.parser')34 for tg in tags:35 # print('tag--> ',tg)36 for div in websoup.find_all(tg):37 raw_cmpny = div.text38 for ptn in pattern:3940 # if fnmatch.fnmatch(raw_cmpny, '*' + ptn + '*'):41 # process_company = raw_cmpny.split(ptn)[0].strip() # this will stip out pattern and also newline also whitespaces4243 regx_expr = '.*' + ptn4445 # '[\D|\s|\n|\t]*'+ptn #'[A-z|a-z|\s|\&|\n|\t]*'+ptn46 # print('>',raw_cmpny)4748 if re.match(regx_expr, raw_cmpny):49 process_company = re.findall(regx_expr, raw_cmpny)[0]50 try:51 print(site, ':', process_company[-50:])52 except:53 print(site, ':', process_company)545556def main():57 print('list of websites are -->\n', str(website))58 company_name = str(input("key in the website from the list above:"))59 if not re.match('[http://|https://].*', company_name):60 try:61 company_name = 'http://' + company_name62 except:63 company_name = 'https://' + company_name64 scrap_company(company_name)6566# main class67if __name__ == '__main__':68 main()6970 ...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run tox automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful