How to use parse_details method in tempest

Best Python code snippet using tempest_python

scraper.py

Source:scraper.py Github

copy

Full Screen

...15 links_de_la_pagina = []16 for url in links_productos:17 links_de_la_pagina = response.urljoin(url)18 yield scrapy.Request(url=links_de_la_pagina, callback=self.parse_details, dont_filter=True)19 def parse_details(self, response):20 title = response.xpath('//h1//text()').extract()21 text = response.xpath('//div[@class="col-12 col-md-9 "]//p/a/text()').extract()22 foto = response.xpath('//div[@class = "[ img-fluid ]"]//img//@src').extract()23 foto = foto[2]24 url = response.xpath('//link[@rel = "canonical"]/@href').extract()25 fecha = response.xpath('//span[@class="tout-timestamp"]//time/text()').extract()26 # write output file27 with open('scraper.jsonl', 'a') as f:28 f.write(json.dumps({'title': title, 'text': text, 'foto': foto, 'url': url, 'fecha' : fecha}, indent=2) + '\n')29class Inmo(scrapy.Spider):30 name = 'Inmo'31 def start_requests(self):32 # reset output file33 with open('scraper.jsonl', 'w') as f:34 f.write(' ')35 yield scrapy.Request('https://inmobiliare.com/ultimas-noticias/', callback=self.parse)36 def parse(self, response):37 links_productos = response.xpath('//li[@class = "mvp-blog-story-col left relative infinite-post"]/a/@href').extract()38 links_productos = links_productos[:4]39 links_de_la_pagina = []40 for url in links_productos:41 links_de_la_pagina = response.urljoin(url)42 yield scrapy.Request(url=links_de_la_pagina, callback=self.parse_details, dont_filter=True)43 def parse_details(self, response):44 title = response.xpath('//h1/text()').extract()45 text = response.xpath('//div[@id="mvp-post-feat-text-wrap"]//h1/text()').extract()46 foto = response.xpath('//figure//img//@data-wpfc-original-src').extract()47 foto = foto[0]48 url = response.xpath('//meta[@name = "twitter:url"]/@content').extract()49 fecha = response.xpath('//time//@datetime').extract()50 # write output file51 with open('scraper.jsonl', 'a') as f:52 f.write(json.dumps({'title': title, 'text': text, 'foto': foto, 'url': url, 'fecha': fecha}, indent=2) + '\n')53class obras(scrapy.Spider):54 name = 'obras'55 def start_requests(self):56 # reset output file57 with open('scraper.jsonl', 'w') as f:58 f.write(' ')59 yield scrapy.Request('https://obras.expansion.mx/inmobiliario?utm_source=internal&utm_medium=link-recommended', callback=self.parse)60 def parse(self, response):61 links_productos = response.xpath('//div[@class="Page-pageLead"]//h3//a//@href').extract()62 links_de_la_pagina = []63 for url in links_productos:64 links_de_la_pagina = response.urljoin(url)65 yield scrapy.Request(url=links_de_la_pagina, callback=self.parse_details, dont_filter=True)66 def parse_details(self, response):67 title = response.xpath('//div[@class="BlocksPage-mainHead"]/h1/text()').extract()68 text = response.xpath('//article[@class = "first-block"]/p/text()').extract()69 foto = response.xpath('//figure[@class = "ArticleLeadFigure"]/img/@data-src').extract()70 url = response.xpath('//link[@rel = "canonical"]/@href').extract()71 fecha = response.xpath('//article//div[@class = "BlocksPage-datePublished"]//text()').extract()72 # write output file73 with open('scraper.jsonl', 'a') as f:74 f.write(json.dumps({'title': title, 'text': text, 'foto': foto, 'url': url, 'fecha': fecha}, indent=2) + '\n')75class lifeRS(scrapy.Spider):76 name = 'lifeRS'77 def start_requests(self):78 # reset output file79 with open('scraper.jsonl', 'w') as f:80 f.write(' ')81 yield scrapy.Request('https://lifehacker.com/tag/real-estate', callback=self.parse)82 def parse(self, response):83 links_productos = response.xpath('//figure//a//@href').extract()84 links_productos = links_productos[:4]85 links_de_la_pagina = []86 for url in links_productos:87 links_de_la_pagina = response.urljoin(url)88 yield scrapy.Request(url=links_de_la_pagina, callback=self.parse_details, dont_filter=True)89 def parse_details(self, response):90 title = response.xpath('//h1//text()').extract()91 text = response.xpath('//div[@class = "r43lxo-0 gqfcxx js_post-content"]//p//text()').extract()92 foto = response.xpath('//div[@class= "sc-1eow4w5-3 lktKQM image-hydration-wrapper"]//img//@data-srcset').extract()93 foto = ' '.join(foto)94 foto = foto.split("w,")95 foto = foto[4]96 foto = foto.split()97 foto = foto[0]98 url = response.xpath('//meta[@name ="twitter:url"]//@content').extract()99 fecha = response.xpath('//time//text()').extract()100 fecha = fecha[0]101 # write output file102 103 with open('scraper.jsonl', 'a') as f:104 f.write(json.dumps({'title': title, 'text': text, 'foto': foto, 'url': url, 'fecha': fecha}, indent=2) + '\n')105class lifeHO(scrapy.Spider):106 name = 'lifeHO'107 def start_requests(self):108 # reset output file109 with open('scraper.jsonl', 'w') as f:110 f.write(' ')111 yield scrapy.Request('https://lifehacker.com/tag/home-office', callback=self.parse)112 def parse(self, response):113 links_productos = response.xpath('//figure//a//@href').extract()114 links_productos = links_productos[:4]115 links_de_la_pagina = []116 for url in links_productos:117 links_de_la_pagina = response.urljoin(url)118 yield scrapy.Request(url=links_de_la_pagina, callback=self.parse_details, dont_filter=True)119 def parse_details(self, response):120 title = response.xpath('//h1//text()').extract()121 text = response.xpath('//div[@class = "r43lxo-0 gqfcxx js_post-content"]//p//text()').extract()122 foto = response.xpath('//div[@class= "sc-1eow4w5-3 lktKQM image-hydration-wrapper"]//img//@data-srcset').extract()123 foto = ' '.join(foto)124 foto = foto.split("w,")125 foto = foto[4]126 foto = foto.split()127 foto = foto[0]128 url = response.xpath('//meta[@name ="twitter:url"]//@content').extract()129 fecha = response.xpath('//time//text()').extract()130 fecha = fecha[0]131 # write output file132 133 with open('scraper.jsonl', 'a') as f:134 f.write(json.dumps({'title': title, 'text': text, 'foto': foto, 'url': url, 'fecha': fecha}, indent=2) + '\n')135class MYL(scrapy.Spider):136 name = 'MYL'137 def start_requests(self):138 # reset output file139 with open('scraper.jsonl', 'w') as f:140 f.write(' ')141 yield scrapy.Request('http://realestatemarket.com.mx/mercado-inmobiliario', callback=self.parse)142 def parse(self, response):143 links_productos = response.xpath('//div[@class = "sprocket-mosaic-image-container"]//a//@href').extract()144 links_productos = links_productos[:4]145 links_de_la_pagina = []146 for url in links_productos:147 links_de_la_pagina = response.urljoin(url)148 yield scrapy.Request(url=links_de_la_pagina, callback=self.parse_details, dont_filter=True)149 def parse_details(self, response):150 title = response.xpath('//h2[@itemprop = "headline"]//text()').extract()151 text = response.xpath('//div[@itemprop = "articleBody"]//text()').extract()152 foto = response.xpath('//img[@itemprop = "image"]//@src').extract()153 foto = ' '.join(foto)154 foto = "http://realestatemarket.com.mx" + foto155 foto = foto.split()156 url = response.xpath('//base//@href').extract()157 fecha = response.xpath('//time//text()').extract()158 # write output file159 160 with open('scraper.jsonl', 'a') as f:161 f.write(json.dumps({'title': title, 'text': text, 'foto': foto, 'url': url, 'fecha': fecha}, indent=2) + '\n')162class MGlobal(scrapy.Spider):163 name = 'MGlobal'164 def start_requests(self):165 # reset output file166 with open('scraper.jsonl', 'w') as f:167 f.write(' ')168 yield scrapy.Request('https://www.mansionglobal.com/luxury-real-estate-news', callback=self.parse)169 def parse(self, response):170 links_productos = response.xpath('//h3//a//@href').extract()171 links_de_la_pagina = []172 for url in links_productos:173 links_de_la_pagina = response.urljoin(url)174 yield scrapy.Request(url=links_de_la_pagina, callback=self.parse_details, dont_filter=True)175 def parse_details(self, response):176 title = response.xpath('//h1//text()').extract()177 title = title[0]178 text = response.xpath('//div[@id= "mg-article-wrap"]//p//text()').extract()179 foto = response.xpath('//img//@src').extract()180 foto = foto[0]181 foto = foto.split()182 url = response.xpath('//link[@rel = "canonical"]//@href').extract()183 fecha = response.xpath('//time//text()').extract()184 # write output file185 186 with open('scraper.jsonl', 'a') as f:187 f.write(json.dumps({'title': title, 'text': text, 'foto': foto, 'url': url, 'fecha': fecha}, indent=2) + '\n')188class curbed(scrapy.Spider):189 name = 'curbed'190 def start_requests(self):191 # reset output file192 with open('scraper.jsonl', 'w') as f:193 f.write(' ')194 yield scrapy.Request('https://www.curbed.com/real-estate/', callback=self.parse)195 def parse(self, response):196 links_productos = response.xpath('//div[@class = "lede-text-wrap has-rubric long"]//a//@href').extract()197 links_de_la_pagina = []198 for url in links_productos:199 links_de_la_pagina = response.urljoin(url)200 yield scrapy.Request(url=links_de_la_pagina, callback=self.parse_details, dont_filter=True)201 def parse_details(self, response):202 title = response.xpath('//h1//text()').extract()203 text = response.xpath('//div[@class ="article-content inline"]//p//text()').extract()204 foto = response.xpath('//picture//img//@src').extract()205 foto = foto[0]206 foto = foto.split()207 url = response.xpath('//link[@rel= "canonical"]//@href').extract()208 fecha = response.xpath('//time//span//text()').extract()209 # write output file210 211 with open('scraper.jsonl', 'a') as f:212 f.write(json.dumps({'title': title, 'text': text, 'foto': foto, 'url': url, 'fecha': fecha}, indent=2) + '\n')213# main driver214if __name__ == '__main__':215 #run scraper...

Full Screen

Full Screen

theculturetrip.py

Source:theculturetrip.py Github

copy

Full Screen

...70 def parse_json(self, response):71 for url in response.body['data']:72 self.scrolled_article.append(url['postID'])73 yield scrapy.Request(url=url['links'], callback=self.parse_details)74 def parse_details(self, response):75 jres = re.findall('\{\".*\:\{.*\:.*\}', response.body.decode("utf-8"))76 jres = json.dumps(jres[0])77 # print('')78 # print(jres)79 # self.i = self.i + 180 # print('')81 item = TheculturetripItem()82 item['topic'] = "theculturetrip"83 item['data'] = jres.encode('utf-8')84 yield item85 # with open('test.json', 'w') as outfile:...

Full Screen

Full Screen

ecab_spider.py

Source:ecab_spider.py Github

copy

Full Screen

...9 link_list = response.xpath('//article//h1//a//@href').extract()10 self.log(f"responselink_list {link_list}")11 for link in link_list:12 yield scrapy.Request(link, callback=self.parse_details)13 def parse_details(self, response):14 item = {}15 self.log(f"parse_details(): response data url {response.url}")16 info = []17 # scrape title18 title = response.xpath('//*[@class="page-_comment-title"]//@title').extract_first()19 self.log(f"parse_details(): title {title}")20 phone = response.xpath('//*[@class="info-box"]//div[contains(text(), "Phone")]/span/text()').extract_first()21 email = response.xpath('//*[@class="info-box"]//div[contains(text(), "Email")]/span/text()').extract_first()22 url = response.xpath('//*[@class="info-box"]//div[contains(text(), "URL")]//span//a//text()').extract_first()23 owner_name = response.xpath('//*[@class="info-box"]//div[contains(text(), "Owner Name")]/span/text()').extract_first()24 owner_designation = response.xpath('//*[@class="info-box"]//div[contains(text(), "Owner Designation")]/span/text()').extract_first()25 address = response.xpath('//*[@class="info-box"]//div[contains(text(), "Address")]/span/text()').extract_first()26 # scrape info27 data_list = response.xpath('//*[@class="info-box"]//div//text()').extract()28 self.log(f"parse_details(): data {data_list}")29 key_list = []30 value_list = []31 for count, i in enumerate(data_list):32 self.log(f"data count {count}")33 if count % 2 == 0:34 # key = data_list[count]35 # value = data_list[count + 1]36 # item.add(key, value)37 # self.log(f"item key {item[data_list[count]]}")38 key_list.append(data_list[count])39 else:40 value_list.append(data_list[count])41 42 ...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run tempest automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful