Test your AI Agents with the all-new Agent to Agent Testing Platform.Learn More

How to use parse_details method in tempest

Best Python code snippet using tempest_python

scraper.py

Source:scraper.py

...15        links_de_la_pagina = []16        for url in links_productos:17            links_de_la_pagina = response.urljoin(url)18            yield scrapy.Request(url=links_de_la_pagina, callback=self.parse_details, dont_filter=True)19    def parse_details(self, response):20        title = response.xpath('//h1//text()').extract()21        text = response.xpath('//div[@class="col-12 col-md-9 "]//p/a/text()').extract()22        foto = response.xpath('//div[@class = "[ img-fluid ]"]//img//@src').extract()23        foto = foto[2]24        url = response.xpath('//link[@rel = "canonical"]/@href').extract()25        fecha = response.xpath('//span[@class="tout-timestamp"]//time/text()').extract()26        # write output file27        with open('scraper.jsonl', 'a') as f:28            f.write(json.dumps({'title': title, 'text': text, 'foto': foto, 'url': url, 'fecha' : fecha}, indent=2) + '\n')29class Inmo(scrapy.Spider):30    name = 'Inmo'31    def start_requests(self):32        # reset output file33        with open('scraper.jsonl', 'w') as f:34            f.write(' ')35        yield scrapy.Request('https://inmobiliare.com/ultimas-noticias/', callback=self.parse)36    def parse(self, response):37        links_productos = response.xpath('//li[@class = "mvp-blog-story-col left relative infinite-post"]/a/@href').extract()38        links_productos = links_productos[:4]39        links_de_la_pagina = []40        for url in links_productos:41            links_de_la_pagina = response.urljoin(url)42            yield scrapy.Request(url=links_de_la_pagina, callback=self.parse_details, dont_filter=True)43    def parse_details(self, response):44        title = response.xpath('//h1/text()').extract()45        text = response.xpath('//div[@id="mvp-post-feat-text-wrap"]//h1/text()').extract()46        foto = response.xpath('//figure//img//@data-wpfc-original-src').extract()47        foto = foto[0]48        url = response.xpath('//meta[@name = "twitter:url"]/@content').extract()49        fecha = response.xpath('//time//@datetime').extract()50        # write output file51        with open('scraper.jsonl', 'a') as f:52            f.write(json.dumps({'title': title, 'text': text, 'foto': foto, 'url': url, 'fecha': fecha}, indent=2) + '\n')53class obras(scrapy.Spider):54    name = 'obras'55    def start_requests(self):56        # reset output file57        with open('scraper.jsonl', 'w') as f:58            f.write(' ')59        yield scrapy.Request('https://obras.expansion.mx/inmobiliario?utm_source=internal&utm_medium=link-recommended', callback=self.parse)60    def parse(self, response):61        links_productos = response.xpath('//div[@class="Page-pageLead"]//h3//a//@href').extract()62        links_de_la_pagina = []63        for url in links_productos:64            links_de_la_pagina = response.urljoin(url)65            yield scrapy.Request(url=links_de_la_pagina, callback=self.parse_details, dont_filter=True)66    def parse_details(self, response):67        title = response.xpath('//div[@class="BlocksPage-mainHead"]/h1/text()').extract()68        text = response.xpath('//article[@class = "first-block"]/p/text()').extract()69        foto = response.xpath('//figure[@class = "ArticleLeadFigure"]/img/@data-src').extract()70        url = response.xpath('//link[@rel = "canonical"]/@href').extract()71        fecha = response.xpath('//article//div[@class = "BlocksPage-datePublished"]//text()').extract()72        # write output file73        with open('scraper.jsonl', 'a') as f:74            f.write(json.dumps({'title': title, 'text': text, 'foto': foto, 'url': url, 'fecha': fecha}, indent=2) + '\n')75class lifeRS(scrapy.Spider):76    name = 'lifeRS'77    def start_requests(self):78        # reset output file79        with open('scraper.jsonl', 'w') as f:80            f.write(' ')81        yield scrapy.Request('https://lifehacker.com/tag/real-estate', callback=self.parse)82    def parse(self, response):83        links_productos = response.xpath('//figure//a//@href').extract()84        links_productos = links_productos[:4]85        links_de_la_pagina = []86        for url in links_productos:87            links_de_la_pagina = response.urljoin(url)88            yield scrapy.Request(url=links_de_la_pagina, callback=self.parse_details, dont_filter=True)89    def parse_details(self, response):90        title = response.xpath('//h1//text()').extract()91        text = response.xpath('//div[@class = "r43lxo-0 gqfcxx js_post-content"]//p//text()').extract()92        foto = response.xpath('//div[@class= "sc-1eow4w5-3 lktKQM image-hydration-wrapper"]//img//@data-srcset').extract()93        foto = ' '.join(foto)94        foto = foto.split("w,")95        foto = foto[4]96        foto = foto.split()97        foto = foto[0]98        url = response.xpath('//meta[@name ="twitter:url"]//@content').extract()99        fecha = response.xpath('//time//text()').extract()100        fecha = fecha[0]101        # write output file102    103        with open('scraper.jsonl', 'a') as f:104            f.write(json.dumps({'title': title, 'text': text, 'foto': foto, 'url': url, 'fecha': fecha}, indent=2) + '\n')105class lifeHO(scrapy.Spider):106    name = 'lifeHO'107    def start_requests(self):108        # reset output file109        with open('scraper.jsonl', 'w') as f:110            f.write(' ')111        yield scrapy.Request('https://lifehacker.com/tag/home-office', callback=self.parse)112    def parse(self, response):113        links_productos = response.xpath('//figure//a//@href').extract()114        links_productos = links_productos[:4]115        links_de_la_pagina = []116        for url in links_productos:117            links_de_la_pagina = response.urljoin(url)118            yield scrapy.Request(url=links_de_la_pagina, callback=self.parse_details, dont_filter=True)119    def parse_details(self, response):120        title = response.xpath('//h1//text()').extract()121        text = response.xpath('//div[@class = "r43lxo-0 gqfcxx js_post-content"]//p//text()').extract()122        foto = response.xpath('//div[@class= "sc-1eow4w5-3 lktKQM image-hydration-wrapper"]//img//@data-srcset').extract()123        foto = ' '.join(foto)124        foto = foto.split("w,")125        foto = foto[4]126        foto = foto.split()127        foto = foto[0]128        url = response.xpath('//meta[@name ="twitter:url"]//@content').extract()129        fecha = response.xpath('//time//text()').extract()130        fecha = fecha[0]131        # write output file132    133        with open('scraper.jsonl', 'a') as f:134            f.write(json.dumps({'title': title, 'text': text, 'foto': foto, 'url': url, 'fecha': fecha}, indent=2) + '\n')135class MYL(scrapy.Spider):136    name = 'MYL'137    def start_requests(self):138        # reset output file139        with open('scraper.jsonl', 'w') as f:140            f.write(' ')141        yield scrapy.Request('http://realestatemarket.com.mx/mercado-inmobiliario', callback=self.parse)142    def parse(self, response):143        links_productos = response.xpath('//div[@class = "sprocket-mosaic-image-container"]//a//@href').extract()144        links_productos = links_productos[:4]145        links_de_la_pagina = []146        for url in links_productos:147            links_de_la_pagina = response.urljoin(url)148            yield scrapy.Request(url=links_de_la_pagina, callback=self.parse_details, dont_filter=True)149    def parse_details(self, response):150        title = response.xpath('//h2[@itemprop = "headline"]//text()').extract()151        text = response.xpath('//div[@itemprop = "articleBody"]//text()').extract()152        foto = response.xpath('//img[@itemprop = "image"]//@src').extract()153        foto = ' '.join(foto)154        foto = "http://realestatemarket.com.mx" + foto155        foto = foto.split()156        url = response.xpath('//base//@href').extract()157        fecha = response.xpath('//time//text()').extract()158        # write output file159    160        with open('scraper.jsonl', 'a') as f:161            f.write(json.dumps({'title': title, 'text': text, 'foto': foto, 'url': url, 'fecha': fecha}, indent=2) + '\n')162class MGlobal(scrapy.Spider):163    name = 'MGlobal'164    def start_requests(self):165        # reset output file166        with open('scraper.jsonl', 'w') as f:167            f.write(' ')168        yield scrapy.Request('https://www.mansionglobal.com/luxury-real-estate-news', callback=self.parse)169    def parse(self, response):170        links_productos = response.xpath('//h3//a//@href').extract()171        links_de_la_pagina = []172        for url in links_productos:173            links_de_la_pagina = response.urljoin(url)174            yield scrapy.Request(url=links_de_la_pagina, callback=self.parse_details, dont_filter=True)175    def parse_details(self, response):176        title = response.xpath('//h1//text()').extract()177        title = title[0]178        text = response.xpath('//div[@id= "mg-article-wrap"]//p//text()').extract()179        foto = response.xpath('//img//@src').extract()180        foto = foto[0]181        foto = foto.split()182        url = response.xpath('//link[@rel = "canonical"]//@href').extract()183        fecha = response.xpath('//time//text()').extract()184        # write output file185    186        with open('scraper.jsonl', 'a') as f:187            f.write(json.dumps({'title': title, 'text': text, 'foto': foto, 'url': url, 'fecha': fecha}, indent=2) + '\n')188class curbed(scrapy.Spider):189    name = 'curbed'190    def start_requests(self):191        # reset output file192        with open('scraper.jsonl', 'w') as f:193            f.write(' ')194        yield scrapy.Request('https://www.curbed.com/real-estate/', callback=self.parse)195    def parse(self, response):196        links_productos = response.xpath('//div[@class = "lede-text-wrap has-rubric long"]//a//@href').extract()197        links_de_la_pagina = []198        for url in links_productos:199            links_de_la_pagina = response.urljoin(url)200            yield scrapy.Request(url=links_de_la_pagina, callback=self.parse_details, dont_filter=True)201    def parse_details(self, response):202        title = response.xpath('//h1//text()').extract()203        text = response.xpath('//div[@class ="article-content inline"]//p//text()').extract()204        foto = response.xpath('//picture//img//@src').extract()205        foto = foto[0]206        foto = foto.split()207        url = response.xpath('//link[@rel= "canonical"]//@href').extract()208        fecha = response.xpath('//time//span//text()').extract()209        # write output file210    211        with open('scraper.jsonl', 'a') as f:212            f.write(json.dumps({'title': title, 'text': text, 'foto': foto, 'url': url, 'fecha': fecha}, indent=2) + '\n')213# main driver214if __name__ == '__main__':215    #run scraper...

theculturetrip.py

Source:theculturetrip.py

...70    def parse_json(self, response):71        for url in response.body['data']:72            self.scrolled_article.append(url['postID'])73            yield scrapy.Request(url=url['links'], callback=self.parse_details)74    def parse_details(self, response):75        jres = re.findall('\{\".*\:\{.*\:.*\}', response.body.decode("utf-8"))76        jres = json.dumps(jres[0])77        # print('')78        # print(jres)79        # self.i = self.i + 180        # print('')81        item = TheculturetripItem()82        item['topic'] = "theculturetrip"83        item['data'] = jres.encode('utf-8')84        yield item85        # with open('test.json', 'w') as outfile:...

ecab_spider.py

Source:ecab_spider.py

...9        link_list = response.xpath('//article//h1//a//@href').extract()10        self.log(f"responselink_list {link_list}")11        for link in link_list:12            yield scrapy.Request(link, callback=self.parse_details)13    def parse_details(self, response):14        item = {}15        self.log(f"parse_details(): response data url {response.url}")16        info = []17        # scrape title18        title = response.xpath('//*[@class="page-_comment-title"]//@title').extract_first()19        self.log(f"parse_details(): title {title}")20        phone = response.xpath('//*[@class="info-box"]//div[contains(text(), "Phone")]/span/text()').extract_first()21        email = response.xpath('//*[@class="info-box"]//div[contains(text(), "Email")]/span/text()').extract_first()22        url = response.xpath('//*[@class="info-box"]//div[contains(text(), "URL")]//span//a//text()').extract_first()23        owner_name = response.xpath('//*[@class="info-box"]//div[contains(text(), "Owner Name")]/span/text()').extract_first()24        owner_designation = response.xpath('//*[@class="info-box"]//div[contains(text(), "Owner Designation")]/span/text()').extract_first()25        address = response.xpath('//*[@class="info-box"]//div[contains(text(), "Address")]/span/text()').extract_first()26        # scrape info27        data_list = response.xpath('//*[@class="info-box"]//div//text()').extract()28        self.log(f"parse_details(): data {data_list}")29        key_list = []30        value_list = []31        for count, i in enumerate(data_list):32            self.log(f"data count {count}")33            if count % 2 == 0:34                # key = data_list[count]35                # value = data_list[count + 1]36                # item.add(key, value)37                # self.log(f"item key {item[data_list[count]]}")38                key_list.append(data_list[count])39            else:40                value_list.append(data_list[count])41            42        ...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.