Test your AI Agents with the all-new Agent to Agent Testing Platform.Learn More

How to use find_link method in elementium

Best Python code snippet using elementium_python

test_find_link.py

Source:test_find_link.py

1# coding=utf-82import os3import re4import json5import unittest6import responses7import find_link8import urllib.parse9from unittest.mock import patch10def wiki_url(params):11    default = {12        'action': 'query',13        'formatversion': 2,14        'format': 'json',15    }16    base = 'https://en.wikipedia.org/w/api.php?'17    url = base + urllib.parse.urlencode({**default, **params})18    print(url)19    return url20def one_page(page):21    return json.dumps({'query': {'pages': [page]}})22def json_query(query):23    return json.dumps({'query': query})24class TestFindLink(unittest.TestCase):25    @responses.activate26    def test_get_case_from_content(self):27        title = 'London congestion charge'28        url = wiki_url({29            'prop': 'revisions|info',30            'rvprop': 'content|timestamp',31            'titles': title,32        })33        body = one_page({34            "revisions": [{35                "timestamp": "2015-08-07T15:37:03Z",36                "content": ("The '''London congestion charge''' is a fee "37                            "charged on most motor vehicles operating within "38                            "the Congestion Charge Zone (CCZ)")39            }]40        })41        responses.add(responses.GET, url, body=body, match_querystring=True)42        self.assertEqual(find_link.core.get_case_from_content(title), title)43        article = 'MyCar is exempt from the London Congestion Charge, road tax and parking charges.'44        for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:45            (c, r) = func('London congestion charge', article)46            self.assertEqual(r, 'London congestion charge')47    @responses.activate48    def test_get_wiki_info(self):49        body = one_page({50            'pageid': 312605,51            'ns': 0,52            'title': 'Government budget deficit',53            'touched': '2011-11-24T22:06:21Z',54            'lastrevid': 462258859,55            'counter': '',56            'length': 1407157        })58        url = wiki_url({59            'redirects': '',60            'titles': 'government budget deficit',61            'prop': 'info',62        })63        responses.add(responses.GET, url, body=body, match_querystring=True)64        redirect = find_link.api.get_wiki_info('government budget deficit')65        self.assertIsNone(redirect)66        body = one_page({67            'ns': 0,68            'title': 'Government budget deficits',69            'missing': True70        })71        url = wiki_url({72            'redirects': '',73            'titles': 'government budget deficits',74            'prop': 'info',75        })76        responses.add(responses.GET, url, body=body, match_querystring=True)77        self.assertRaises(find_link.api.Missing,78                          find_link.api.get_wiki_info,79                          'government budget deficits')80    @responses.activate81    def test_cat_start(self):82        body = json.dumps({"query": {"allpages": []}})83        url = 'https://en.wikipedia.org/w/api.php'84        responses.add(responses.GET, url, body=body)85        self.assertEqual(find_link.api.cat_start('test123'), [])86    @responses.activate87    def test_all_pages(self):88        title = 'Government budget deficit'89        body = json_query({90            "allpages": [{"pageid": 312605, "ns": 0, "title": title}]91        })92        url = wiki_url({'apfilterredir': 'nonredirects',93                        'apprefix': title,94                        'list': 'allpages',95                        'apnamespace': 0,96                        'aplimit': 500})97        responses.add(responses.GET, url, body=body, match_querystring=True)98        result = find_link.api.all_pages(title)99        self.assertListEqual(result, [])100    @responses.activate101    def test_categorymembers(self):102        body = json_query({"categorymembers": []})103        url = wiki_url({104            'cmnamespace': 0,105            'list':106            'categorymembers',107            'cmlimit': 500,108            'cmtitle': 'Test123'109        })110        responses.add(responses.GET, url, body=body, match_querystring=True)111        self.assertListEqual(find_link.core.categorymembers('test123'), [])112    @responses.activate113    def test_is_redirect_to(self):114        title_from = 'Bread maker'115        title_to = 'Bread machine'116        body = one_page({117            'ns': 0,118            'title': 'Bread maker',119            'touched': '2015-06-21T15:12:00Z',120            'length': 27,121            'redirect': True122        })123        url = wiki_url({'titles': 'Bread maker', 'prop': 'info'})124        responses.add(responses.GET, url, body=body, match_querystring=True)125        url = wiki_url({'titles': 'Bread maker', 'prop': 'revisions', 'rvprop': 'content'})126        body = one_page({127            'ns': 0,128            'title': 'Bread maker',129            'revisions': [{130                'contentformat': 'text/x-wiki',131                'contentmodel': 'wikitext',132                'content': '#REDIRECT [[Bread machine]]'133            }]134        })135        responses.add(responses.GET, url, body=body, match_querystring=True)136        self.assertTrue(find_link.core.is_redirect_to(title_from, title_to))137        title_from = 'Sugarlump'138        title_to = 'Sugar'139        url = wiki_url({'prop': 'info', 'titles': 'Sugarlump'})140        body = one_page({141            'ns': 0,142            'title': 'Sugarlump',143            'missing': True,144            'contentmodel': 'wikitext',145        })146        responses.add(responses.GET, url, body=body, match_querystring=True)147        self.assertFalse(find_link.core.is_redirect_to(title_from, title_to))148    @responses.activate149    def test_wiki_redirects(self):150        url = wiki_url({151            'blfilterredir': 'redirects',152            'bllimit': 500,153            'bltitle': 'market town',154            'list': 'backlinks',155            'blnamespace': 0,156        })157        body = json_query({158            'backlinks': [159                {'title': 'Market-town', 'redirect': ''},160                {'title': 'Market towns', 'redirect': ''},161                {'title': 'Marktgemeinde', 'redirect': ''},162                {'title': 'Market right', 'redirect': ''},163                {'title': 'Market towns in England', 'redirect': ''},164                {'title': 'Market rights', 'redirect': ''},165                {'title': 'Market charter', 'redirect': ''},166                {'title': 'Market town privileges', 'redirect': ''}167            ]168        })169        responses.add(responses.GET, url, body=body, match_querystring=True)170        result = find_link.api.wiki_redirects('market town')171        self.assertTrue(all(isinstance(title, str) for title in result))172    def test_en_dash(self):173        title = u'obsessive\u2013compulsive disorder'174        content = 'This is a obsessive-compulsive disorder test'175        (c, r) = find_link.match.find_link_in_content(title, content)176        self.assertEqual(r, title)177        self.assertEqual(c, u'This is a [[obsessive\u2013compulsive disorder]] test')178        (c, r) = find_link.match.find_link_in_content(title, content)179        self.assertEqual(r, title)180        self.assertEqual(c, u'This is a [[obsessive\u2013compulsive disorder]] test')181        content = 'This is a [[obsessive-compulsive]] disorder test'182        (c, r) = find_link.match.find_link_in_content(title, content)183        self.assertEqual(r, title)184        self.assertEqual(c, u'This is a [[obsessive\u2013compulsive disorder]] test')185        (c, r) = find_link.match.find_link_in_content(title, content)186        self.assertEqual(r, title)187        self.assertEqual(c, u'This is a [[obsessive\u2013compulsive disorder]] test')188    @responses.activate189    def test_wiki_search(self):190        url = wiki_url({191            'list': 'search',192            'srlimit': 50,193            'srsearch': '"hedge"',194            'continue': '',195            'srwhat': 'text'196        })197        body = json_query({198            "searchinfo": {"totalhits": 444},199            "search": [{200                "ns": 0,201                "title": "Coaching inn",202                "snippet": "approximately the mid-17th century for a period of about 200 years, the <span class=\"searchmatch\">coaching</span> <span class=\"searchmatch\">inn</span>, sometimes called a coaching house or staging inn, was a vital part of",203                "size": 4918,204                "wordcount": 561,205                "timestamp": "2015-08-04T13:20:24Z"206            }, {207                "ns": 0,208                "title": "Varbuse",209                "snippet": "Estonian Road Museum is located in the former Varbuse <span class=\"searchmatch\">coaching</span> <span class=\"searchmatch\">inn</span>.       Varbuse <span class=\"searchmatch\">coaching</span> <span class=\"searchmatch\">inn</span>          Estonian Road Museum       &quot;Population by place",210                "size": 2350,211                "wordcount": 96,212                "timestamp":"2015-01-02T23:23:10Z"213            }]214        })215        responses.add(responses.GET, url, body=body, match_querystring=True)216        url = wiki_url({217            'continue': '',218            'srsearch': '"coaching inn"',219            'list': 'search',220            'srlimit': 50,221            'srwhat': 'text',222        })223        responses.add(responses.GET, url, body=body, match_querystring=True)224        totalhits, results = find_link.api.wiki_search('coaching inn')225        self.assertGreater(totalhits, 0)226        totalhits, results = find_link.api.wiki_search('hedge (finance)')227        self.assertGreater(totalhits, 0)228    @responses.activate229    def test_do_search(self):230        url = wiki_url({231            'continue': '',232            'action': 'query',233            'srsearch': '"market town"',234            'srwhat': 'text',235            'format': 'json',236            'list': 'search',237            'srlimit': '50',238        })239        body = '{"query":{"searchinfo":{"totalhits":3593},"search":[{"ns":0,"title":"Market town","snippet":"<span class=\\"searchmatch\\">Market</span> <span class=\\"searchmatch\\">town</span> or market right is a legal term, originating in the medieval period, for a European settlement that has the right to host markets, distinguishing","size":10527,"wordcount":1362,"timestamp":"2015-06-25T18:19:23Z"},{"ns":0,"title":"V\\u011btrn\\u00fd Jen\\u00edkov","snippet":"V\\u011btrn\\u00fd Jen\\u00edkov (Czech pronunciation: [\\u02c8vj\\u025btr\\u0329ni\\u02d0\\u02c8j\\u025b\\u0272i\\u02d0kof]) is a <span class=\\"searchmatch\\">market</span> <span class=\\"searchmatch\\">town</span> in the Jihlava District, Vyso\\u010dina Region of the Czech Republic. About 582","size":833,"wordcount":76,"timestamp":"2013-02-28T19:49:34Z"}]}}'240        responses.add(responses.GET, url, body=body, match_querystring=True)241        url = wiki_url({242            'continue': '',243            'blnamespace': '0',244            'bllimit': '500',245            'action': 'query',246            'format': 'json',247            'list': 'backlinks',248            'bltitle': 'market town'249        })250        body = '{"query":{"backlinks":[{"pageid":1038,"ns":0,"title":"Aarhus"},{"pageid":1208,"ns":0,"title":"Alan Turing"},{"pageid":2715,"ns":0,"title":"Abergavenny"},{"pageid":4856,"ns":0,"title":"Borough"},{"pageid":5391,"ns":0,"title":"City"},{"pageid":6916,"ns":0,"title":"Colony"},{"pageid":8166,"ns":0,"title":"Devon"},{"pageid":13616,"ns":0,"title":"Howard Carter"},{"pageid":13861,"ns":0,"title":"Hampshire"},{"pageid":13986,"ns":0,"title":"Hertfordshire"},{"pageid":16143,"ns":0,"title":"John Locke"},{"pageid":16876,"ns":0,"title":"Kingston upon Thames"},{"pageid":19038,"ns":0,"title":"Municipality"},{"pageid":20206,"ns":0,"title":"Manchester"},{"pageid":22309,"ns":0,"title":"Oslo"},{"pageid":22422,"ns":0,"title":"Olney Hymns"},{"pageid":23241,"ns":0,"title":"Telecommunications in China"},{"pageid":25798,"ns":0,"title":"Reykjav\\u00edk"},{"pageid":25897,"ns":0,"title":"Road"},{"pageid":26316,"ns":0,"title":"Racial segregation"}]}}'251        responses.add(responses.GET, url, body=body, match_querystring=True)252        url = wiki_url({253            'apnamespace': '14',254            'aplimit': '500',255            'format': 'json',256            'action': 'query',257            'apprefix': 'market town',258            'apfilterredir': 'nonredirects',259            'list': 'allpages'260        })261        body = '{"query":{"allpages":[{"pageid":27601242,"ns":14,"title":"Category:Market towns"}]}}'262        responses.add(responses.GET, url, body=body, match_querystring=True)263        url = wiki_url({264            'cmlimit': '500',265            'action': 'query',266            'cmtitle': 'Category:Market town',267            'cmnamespace': '0',268            'format': 'json',269            'list': 'categorymembers'270        })271        body = '{"query":{"categorymembers":[]}}'272        responses.add(responses.GET, url, body=body, match_querystring=True)273        url = wiki_url({274            'cmlimit': '500',275            'action': 'query',276            'cmtitle': 'Category:Market towns',277            'cmnamespace': '0',278            'format': 'json',279            'list': 'categorymembers'280        })281        body = '{"query":{"categorymembers":[]}}'282        responses.add(responses.GET, url, body=body, match_querystring=True)283        url = wiki_url({284            'apfilterredir': 'nonredirects',285            'apprefix': 'Market town',286            'list': 'allpages',287            'apnamespace': 0,288            'aplimit': 500,289        })290        body = '{"query":{"allpages":[{"pageid":145965,"ns":0,"title":"Market town"},{"pageid":13941316,"ns":0,"title":"Market towns of Buskerud county"}]}}'291        responses.add(responses.GET, url, body=body, match_querystring=True)292        url = wiki_url({293            'prop': 'templates',294            'continue': '',295            'tlnamespace': 10,296            'titles': 'V\u011btrn\u00fd Jen\u00edkov',297            'tllimit': 500,298        })299        body = '{"query":{"pages":[{"pageid":17087711,"ns":0,"title":"V\u011btrn\u00fd Jen\u00edkov","templates":[{"ns":10,"title":"Template:Asbox"},{"ns":10,"title":"Template:Commons"}]}]}}'300        responses.add(responses.GET, url, body=body, match_querystring=True)301        reply = find_link.core.do_search('market town', None)302        self.assertIsInstance(reply, dict)303        self.assertSetEqual(set(reply.keys()), {'totalhits', 'results', 'longer'})304        self.assertGreater(reply['totalhits'], 0)305        self.assertIsInstance(reply['results'], list)306        self.assertGreater(len(reply['results']), 0)307        self.assertTrue(any(title.startswith('Market towns of') for title in reply['longer']))308    def test_parse_cite(self):309        bindir = os.path.abspath(os.path.dirname(__file__))310        filename = os.path.join(bindir, 'cite_parse_error')311        sample = open(filename).read()312        found_duty = False313        for a, b in find_link.match.parse_cite(sample):314            if 'duty' in b.lower():315                found_duty = True316        self.assertTrue(found_duty)317    def test_avoid_link_in_cite(self):318        tp = 'magic'319        content = 'test <ref>{{cite web|title=Magic|url=http://magic.com}}</ref>'320        (c, r) = find_link.match.find_link_in_content(tp, content + ' ' + tp)321        self.assertEqual(c, content + ' [[' + tp + ']]')322        self.assertEqual(r, tp)323        self.assertRaises(find_link.match.NoMatch, find_link.match.find_link_in_content, tp, content)324        tp = 'abc'325        content = '==Early life==\n<ref>{{cite news|}}</ref>abc'326        (c, r) = find_link.match.find_link_in_content(tp, content)327        self.assertEqual(c, content.replace(tp, '[[' + tp + ']]'))328        self.assertEqual(r, tp)329    def test_coastal_sage_scrub(self):330        sample = '''Depend on a [[habitat]] that has shown substantial historical or recent declines in size. This criterion infers the population viability of a species based on trends in the habitats upon which it specializes. Coastal [[wetland]]s, particularly in the urbanized [[San Francisco Bay]] and south-coastal areas, alluvial fan [[sage (plant)|sage]] [[scrubland|scrub]] and coastal sage scrub in the southern coastal basins, and arid scrub in the [[San Joaquin Valley]], are examples of California habitats that have seen dramatic reductions in size in recent history. Species that specialize in these habitats generally meet the criteria for Threatened or Endangered status or Special Concern status;'''331        (c, r) = find_link.match.find_link_in_chunk('coastal sage scrub', sample)332        self.assertEqual(c, sample.replace('coastal sage scrub', '[[coastal sage scrub]]'))333        self.assertEqual(r, 'coastal sage scrub')334    def test_section_iter(self):335        result = list(find_link.match.section_iter('test'))336        self.assertListEqual(result, [(None, 'test')])337        text = '''==Heading 1 ==338Paragraph 1.339==Heading 2 ==340Paragraph 2.341'''342        expect = [343            ('==Heading 1 ==\n', 'Paragraph 1.\n'),344            ('==Heading 2 ==\n', 'Paragraph 2.\n')345        ]346        self.assertListEqual(list(find_link.match.section_iter(text)), expect)347    def test_get_subsections(self):348        text = '''==Heading 1 ==349Paragraph 1.350==Heading 2 ==351Paragraph 2.352===Level 2===353Paragraph 3.354==Heading 4==355Paragraph 4.356'''357        self.assertEqual(find_link.match.get_subsections(text, 4), '')358    @responses.activate359    def test_match_found(self):360        url = wiki_url({'prop': 'revisions|info', 'titles': 'payment protection insurance', 'rvprop': 'content|timestamp'})361        content = "{{multiple issues|\n{{Globalize|2=the United Kingdom|date=July 2011}}\n{{Original research|date=April 2009}}\n}}\n'''Payment protection insurance''' ('''PPI'''), also known as '''credit insurance''', '''credit protection insurance''', or '''loan repayment insurance''', is an insurance product that enables consumers to insure repayment of credit if the borrower dies, becomes ill or disabled, loses a job, or faces other circumstances that may prevent them from earning income to service the debt. It is not to be confused with [[income protection insurance]], which is not specific to a debt but covers any income. PPI was widely sold by banks and other credit providers as an add-on to the loan or overdraft product.<ref>{{cite web | url=http://www.fsa.gov.uk/consumerinformation/product_news/insurance/payment_protection_insurance_/what-is-ppi | title=What is payment protection insurance? | accessdate=17 February 2014}}</ref>"362        body = json.dumps({363            "query": {364                "pages": [365                    {366                        "title": "Payment protection insurance",367                        "revisions": [{"timestamp": "2016-03-26T17:56:25Z", "content": content}]368                    }369                ]370            }371        })372        responses.add(responses.GET, url, body=body, match_querystring=True)373        l = 'payment protection insurance'374        l2 = 'payment Protection Insurance'375        m = re.compile('(P)' + l[1:], re.I).match('P' + l2[1:])376        self.assertEqual(find_link.match.match_found(m, l, None), l)377    def test_avoid_link_in_heading(self):378        tp = 'test phrase'379        content = '''380=== Test phrase ===381This sentence contains the test phrase.'''382        (c, r) = find_link.match.find_link_in_content(tp, content)383        self.assertEqual(c, content.replace(tp, '[[' + tp + ']]'))384        self.assertEqual(r, tp)385    @responses.activate386    @patch('find_link.match.get_case_from_content', lambda s: None)387    def test_find_link_in_content(self):  # this test is slow388        # orig_get_case_from_content = find_link.core.get_case_from_content389        # find_link.core.get_case_from_content = lambda s: None390        self.assertRaises(find_link.match.NoMatch, find_link.match.find_link_in_content, 'foo', 'bar')391        input_content = 'Able to find this test\n\nphrase in an article.'392        self.assertRaises(find_link.match.NoMatch,393                          find_link.match.find_link_in_content,394                          'test phrase', input_content)395        input_content = 'Able to find this test  \n  \n  phrase in an article.'396        self.assertRaises(find_link.match.NoMatch,397                          find_link.match.find_link_in_content,398                          'test phrase', input_content)399        otrain = 'Ticketing on the O-Train works entirely on a proof-of-payment basis; there are no ticket barriers or turnstiles, and the driver does not check fares.'400        (c, r) = find_link.match.find_link_in_content('ticket barriers', otrain, linkto='turnstile')401        self.assertEqual(c, otrain.replace('turnstile', '[[turnstile]]'))402        self.assertEqual(r, 'turnstile')403        sample = """On April 26, 2006, Snoop Dogg and members of his entourage were arrested after being turned away from [[British Airways]]' first class lounge at [[Heathrow Airport]]. Snoop and his party were not allowed to enter the lounge because some of the entourage were flying first class, other members in economy class. After the group was escorted outside, they vandalized a duty-free shop by throwing whiskey bottles. Seven police officers were injured in the midst of the disturbance. After a night in prison, Snoop and the other men were released on bail on April 27, but he was unable to perform at the Premier Foods People's Concert in [[Johannesburg]] on the same day. As part of his bail conditions, he had to return to the police station in May. The group has been banned by British Airways for "the foreseeable future."<ref>{{cite news|url=http://news.bbc.co.uk/1/hi/entertainment/4949430.stm |title=Rapper Snoop Dogg freed on bail |publisher=BBC News  |date=April 27, 2006 |accessdate=January 9, 2011}}</ref><ref>{{cite news|url=http://news.bbc.co.uk/1/hi/entertainment/4953538.stm |title=Rap star to leave UK after arrest |publisher=BBC News  |date=April 28, 2006 |accessdate=January 9, 2011}}</ref> When Snoop Dogg appeared at a London police station on May 11, he was cautioned for [[affray]] under [[Fear or Provocation of Violence|Section 4]] of the [[Public Order Act 1986|Public Order Act]] for use of threatening words or behavior.<ref>{{cite news|url=http://newsvote.bbc.co.uk/1/hi/entertainment/4761553.stm|title=Rap star is cautioned over brawl |date=May 11, 2006|publisher=BBC News |accessdate=July 30, 2009}}</ref> On May 15, the [[Home Office]] decided that Snoop Dogg should be denied entry to the United Kingdom for the foreseeable future due to the incident at Heathrow as well as his previous convictions in the United States for drugs and firearms offenses.<ref>{{cite web|url=http://soundslam.com/articles/news/news.php?news=060516_snoopb |title=Soundslam News |publisher=Soundslam.com |date=May 16, 2006 |accessdate=January 9, 2011}}</ref><ref>{{cite web|url=http://uk.news.launch.yahoo.com/dyna/article.html?a=/060516/340/gbrj1.html&e=l_news_dm |title=Snoop 'banned from UK' |publisher=Uk.news.launch.yahoo.com |accessdate=January 9, 2011}}</ref> Snoop Dogg's visa card was rejected by local authorities on March 24, 2007 because of the Heathrow incident.<ref>{{cite news |first=VOA News |title=Rapper Snoop Dogg Arrested in UK |date=April 27, 2006 |publisher=Voice of America |url=http://classic-web.archive.org/web/20060603120934/http://voanews.com/english/archive/2006-04/2006-04-27-voa17.cfm |work=VOA News |accessdate=December 31, 2008}}</ref> A concert at London's Wembley Arena on March 27 went ahead with Diddy (with whom he toured Europe) and the rest of the show."""404        (c, r) = find_link.match.find_link_in_content('duty-free shop', sample)405        self.assertEqual(c, sample.replace('duty-free shop', '[[duty-free shop]]'))406        self.assertEqual(r, 'duty-free shop')407        sample = '[[Retriever]]s are typically used when [[waterfowl]] hunting. Since a majority of waterfowl hunting employs the use of small boats'408        for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:409            (c, r) = func('waterfowl hunting', sample)410            self.assertEqual(c, sample.replace(']] hunting', ' hunting]]'))411            self.assertEqual(r, 'waterfowl hunting')412        sample = 'abc [[File:Lufschiffhafen Jambol.jpg|thumb|right|Jamboli airship hangar in Bulgaria]] abc'413        q = 'airship hangar'414        for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:415            (c, r) = func(q, sample)416            self.assertEqual(c, sample.replace(q, '[[' + q + ']]'))417            self.assertEqual(r, q)418        sample = 'It is relatively easy for insiders to capture insider-trading like gains through the use of "open market repurchases."  Such transactions are legal and generally encouraged by regulators through safeharbours against insider trading liability.'419        q = 'insider trading'420        q = 'ski mountaineering' # GermÃ¡n Cerezo Alonso 421        sample = 'started ski mountaineering in 1994 and competed first in the 1997 Catalunyan Championship. He finished fifth in the relay event of the [[2005 European Championship of Ski Mountaineering]].'422        for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:423            (c, r) = func(q, sample)424            self.assertEqual(c, sample.replace(q, '[[' + q + ']]'))425            self.assertEqual(r, q)426        q = 'fall of the Iron Curtain'427        linkto = 'revolutions of 1989'428        sample = 'With the fall of the [[Iron Curtain]] and the associated'429        #search_for_link = mk_link_matcher(q)430        #m = search_for_link(sample)431        #replacement = match_found(m, q, linkto)432        #self.assertEqual(replacement, 'revolutions of 1989|fall of the Iron Curtain]]')433        (c, r) = find_link.match.find_link_in_chunk(q, sample, linkto=linkto)434        self.assertEqual(c, sample.replace('fall of the [[', '[[revolutions of 1989|fall of the '))435        self.assertEqual(r, 'revolutions of 1989|fall of the Iron Curtain')436        q = 'religious conversion'437        sample = 'There were no reports of [[forced religious conversion]], including of minor U.S. citizens'438        self.assertRaises(find_link.match.LinkReplace, find_link.match.find_link_in_chunk, q, sample)439        q = 'two-factor authentication'440        sample = "Two factor authentication is a 'strong authentication' method as it"441        for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:442            (c, r) = func(q, sample)443            self.assertEqual(c, "[[Two-factor authentication]] is a 'strong authentication' method as it")444            self.assertEqual(r, q[0].upper() + q[1:])445        q = 'spherical trigonometry'446        sample = 'also presents the spherical trigonometrical formulae'447        (c, r) = find_link.match.find_link_in_content('spherical trig', sample, linkto=q)448        self.assertEqual(c, 'also presents the [[spherical trigonometry|spherical trigonometrical]] formulae')449        self.assertEqual(r, 'spherical trigonometry|spherical trigonometrical')450        q = 'post-World War II baby boom'451        sample = 'huge boost during the post World War II [[Baby Boomer|Baby Boom]].'452        for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:453            (c, r) = func(q, sample)454            self.assertEqual(c, 'huge boost during the [[post-World War II baby boom]].')455            self.assertEqual(r, q)456        q = 'existence of God'457        sample = 'with "je pense donc je suis" or "[[cogito ergo sum]]" or "I think, therefore I am", argued that "the self" is something that we can know exists with [[epistemology|epistemological]] certainty. Descartes argued further that this knowledge could lead to a proof of the certainty of the existence of [[God]], using the [[ontological argument]] that had been formulated first by [[Anselm of Canterbury]].{{Citation needed|date=January 2012}}'458        for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:459            (c, r) = func(q, sample)460            self.assertEqual(c, sample.replace('existence of [[God', '[[existence of God'))461            self.assertEqual(r, q)462        q = 'virtual machine'463        sample = 'It compiles Python programs into intermediate bytecode, which is executed by the virtual machine. Jython compiles into Java byte code, which can then be executed by every [[Java Virtual Machine]] implementation. This also enables the use of Java class library functions from the Python program.'464        for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:465            (c, r) = func(q, sample)466            self.assertEqual(c, sample.replace('virtual machine', '[[virtual machine]]'))467            self.assertEqual(r, q)468        url = wiki_url({469            'prop': 'info',470            'redirects': '',471            'titles': 'Teleological argument'472        })473        body = json.dumps({474            'query': {475                'pages': [{476                    'pageid': 30731,477                    'ns': 0,478                    'title': 'Teleological argument',479                }],480            }481        })482        q = 'existence of God'483        sample = '[[Intelligent design]] is an [[Teleological argument|argument for the existence of God]],'484        for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:485            responses.add(responses.GET, url, body=body, match_querystring=True)486            self.assertRaises(find_link.match.LinkReplace, func, q, sample)487        q = 'correlation does not imply causation'488        sample = 'Indeed, an important axiom that social scientists cite, but often forget, is that "[[correlation]] does not imply [[Causality|causation]]."'489        for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:490            (c, r) = func(q, sample)491            self.assertEqual(c, 'Indeed, an important axiom that social scientists cite, but often forget, is that "[[correlation does not imply causation]]."')492            self.assertEqual(r, q)493        sample = "A '''pedestal desk''' is usually a large free-standing [[desk]]"494        self.assertRaises(find_link.match.NoMatch, find_link.match.find_link_in_content, 'standing desk', sample)495        pseudocode1 = 'These languages are typically [[Dynamic typing|dynamically typed]], meaning that variable declarations and other [[Boilerplate_(text)#Boilerplate_code|boilerplate code]] can be omitted.'496        for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:497            (c, r) = func('boilerplate code', pseudocode1)498            self.assertEqual(c, pseudocode1.replace('Boilerplate_(text)#Boilerplate_code|', ''))499            self.assertEqual(r, 'boilerplate code')500        pseudocode2 = 'Large amounts of [[boilerplate (text)#Boilerplate code|boilerplate]] code.'501        for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:502            (c, r) = func('boilerplate code', pseudocode2)503            self.assertEqual(c, pseudocode2.replace('(text)#Boilerplate code|boilerplate]] code', 'code]]'))504            self.assertEqual(r, 'boilerplate code')505        sample = 'particularly to handle the peak volumes of work generated by Payment Protection Insurance complaints.'506        (c, r) = find_link.match.find_link_in_content('payment protection insurance', sample)507        self.assertIn('payment protection insurance', c)508        (c, r) = find_link.match.find_link_in_text('payment protection insurance', sample)509        self.assertIn('payment protection insurance', c)510        if False:511            sample = 'further investigations on [[Extrajudicial punishment|extrajudicial killings]] by police forces.'512            q = 'extrajudicial killing'513            (c, r) = find_link.match.find_link_in_content(q, sample)514            self.assertIn(q, c)515            (c, r) = find_link.match.find_link_in_text(q, sample)516            self.assertIn(q, c)517        sample = 'units formed with [[SI prefix|metric prefixes]], such as kiloseconds'518        find_link.match.find_link_in_content('metric prefix', sample)519        sample = u"==Geography==\nA gem of Bermuda's coastline, it is surrounded by [[St. George's Parish, Bermuda|St. George's Parish]] in the north, east, south (Tucker's Town), and [[Hamilton Parish, Bermuda|Hamilton Parish]] in the west. A chain of islands and rocks stretches across the main opening to the [[Atlantic Ocean]], in the east, notably [[Cooper's Island, Bermuda|Cooper's Island]] (which was made a landmass contiguous to St. David's Island and Longbird Island in the 1940s), and [[Nonsuch Island, Bermuda|Nonsuch Island]]. The only channel suitable for large vessels to enter the harbour from the open Atlantic is [[Castle Roads, Bermuda|Castle Roads]], which was historically guarded by a number of fortifications, on [[Castle Island, Bermuda|Castle Island]], Brangman's Island, and Goat Island. Forts were also placed nearby on other small islands, and on the Tucker's Town peninsula of the Main Island. In the west, [[The Causeway, Bermuda|The Causeway]] crosses from the main island to St. David's Island, and beyond this a stretch of water known as [[Ferry Reach, Bermuda|Ferry Reach]] connects the harbour with [[St. George's Harbor, Bermuda|St. George's Harbour]] to the north, where Bermuda's first permanent settlement, [[St. George's, Bermuda|St. George's Town]], was founded in 1612. An unincorporated settlement, [[Tucker's Town, Bermuda|Tucker's Town]], was established on the [[peninsula]] of the [[Main Island, Bermuda|Main Island]] at the south-west of the harbour. The settlement was cleared by compulsory purchase order in the 1920s in order to create a luxury enclave where homes could be purchased by wealthy foreigners, and the attendant Mid Ocean Golf Club. In [[Hamilton Parish, Bermuda|Hamilton Parish]], on the western shore of the harbour, lies [[Walsingham Bay, Bermuda|Walsingham Bay]], the site where, in 1609-10, the crew of the wrecked [[Sea Venture]] built the ''[[Patience]]'', one of two ships built, which carried most of the survivors of the wrecking to [[Jamestown, Virginia|Jamestown]], [[Virginia]], in 1610. The ''Patience'' returned to Bermuda with [[George Somers|Admiral Sir George Somers]], who died in Bermuda later that year."520        find_link.match.find_link_in_content('compulsory purchase order', sample)521        if False:522            yard = "primary [[Hump yard|hump classification yards]] are located in Allentown."523            for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:524                (c, r) = func('classification yard', yard)525                self.assertEqual(c, yard.replace('[[Hump yard|hump classification yards]]', 'hump [[classification yard]]s'))526                self.assertEqual(r, 'classification yard')527            yard2 = 'A major [[hump yard|railway classification yard]] is north of Blenheim at [[Spring Creek, New Zealand|Spring Creek]].'528            for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:529                (c, r) = func('classification yard', yard2)530                self.assertEqual(c, yard2.replace('[[hump yard|railway classification yard]]', 'railway [[classification yard]]'))531                self.assertEqual(r, 'classification yard')532        yard3 = 'Five houses were destroyed and three others were damaged. A high school was also heavily damaged and railroad cars were thrown in a small freight classification yard. Four people were injured.'533        for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:534            (c, r) = func('classification yard', yard3)535            self.assertEqual(c, yard3.replace('classification yard', '[[classification yard]]'))536            self.assertEqual(r, 'classification yard')537        #yard2 = 'For the section from [[Rotterdam]] to the large [[Kijfhoek (classification yard)|classification yard Kijfhoek]] existing track was reconstructed, but three quarters of the line is new, from Kijfhoek to [[Zevenaar]] near the German border.'538        #(c, r) = find_link.match.find_link_in_text('classification yard', yard2)539        if False:540            sample = 'GEHA also has a contract with the federal government to administer benefits for the [[Pre-existing Condition Insurance Plan]], which will be a transitional program until 2014.'541            q = 'pre-existing condition'542            for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:543                self.assertRaises(find_link.match.LinkReplace, func, q, sample)544        station = 'Ticket barriers control access to all platforms, although the bridge entrance has no barriers.'545        (c, r) = find_link.match.find_link_in_content('ticket barriers', station, linkto='turnstile')546        self.assertEqual(c, station.replace('Ticket barriers', '[[Turnstile|Ticket barriers]]'))547        self.assertEqual(r, 'Turnstile|Ticket barriers')548        content = [549            'Able to find this test phrase in an article.',550            'Able to find this test  phrase in an article.',551            'Able to find this test\n  phrase in an article.',552            'Able to find this test  \nphrase in an article.',553            'Able to find this test\nphrase in an article.',554            'Able to find this test-phrase in an article.',555            'Able to find this test PHRASE in an article.',556            'Able to find this TEST PHRASE in an article.',557            'Able to find this test\nPhrase in an article.',558            'Able to find this [[test]] phrase in an article.',559            'Able to find this TEST [[PHRASE]] in an article.',560            'Able to find this [[testing|test]] phrase in an article.',561            'Able to find this testphrase in an article.']562        for input_content in content:563            for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:564                (c, r) = func('test phrase', input_content)565                self.assertEqual(c, 'Able to find this [[test phrase]] in an article.')566                self.assertEqual(r, 'test phrase')567        q = 'recoil operation'568        article = 'pattern of long-recoil operation as the 25mm and 40mm guns.'569        search_for_link = find_link.match.mk_link_matcher(q)570        self.assertFalse(search_for_link(article))571        q = 'after-dinner speaker'572        linkto = 'public speaking'573        sample = 'in demand as an [[Public speaker|after-dinner speaker]].'574        (c, r) = find_link.match.find_link_in_chunk(q, sample, linkto=linkto)575        self.assertEqual(c, sample.replace('Public speaker', 'public speaking'))...

test_admin.py

Source:test_admin.py

...6    def test_can_create_release(self):7        # Ryan logs into the admin8        self.adminLogin()9        # He creates an unpublished release10        self.find_link('Releases').click()11        self.find_link('ADD RELEASE').click()12        self.find_name('title').send_keys('First release')13        self.find_name('date').send_keys(date_format(from_today(1)))14        self.find_name('cover_url').send_keys('http://localhost/cover.jpg')15        self.find_name('player_code').send_keys('<iframe></iframe>')16        self.find_name('description').send_keys('Release description')17        self.find_name('credits').send_keys('Release credits')18        self.find_name('_save').click()19        self.assertIn('First release', self.find_tag('body').text)20        # He verifies that it's not published21        self.get_url('/music')22        self.assertIn('Music', self.browser.title)23        self.assertNotIn('First release', self.find_tag('body').text)24        self.get_url('/music/first-release')25        self.assertNotIn('First release', self.browser.title)26        # He publishes the release27        self.get_url('/admin')28        self.find_link('Releases').click()29        self.find_link('First release').click()30        self.find_name('publish').click()31        self.find_name('_save').click()32        self.assertIn('First release', self.find_tag('body').text)33        # He verifies that it was published34        self.get_url('/music')35        self.find_link('First release').click()36        self.assertIn('First release', self.browser.title)37        # TODO: Test absence/presence of details?38class SongTestCase(AdminTestCase):39    def setUp(self):40        super().setUp()41        PublishedReleaseFactory.create(title='First release',42                                       slug='first-release')43    def test_can_create_song(self):44        # Ryan logs into the admin45        self.adminLogin()46        # He adds a published song47        self.find_link('Songs').click()48        self.find_link('ADD SONG').click()49        self.find_name('title').send_keys('First song')50        self.find_name('description').send_keys('Song description')51        self.find_name('player_code').send_keys('<iframe></iframe>')52        self.find_name('credits').send_keys('Song credits')53        self.find_name('lyrics').send_keys('Song lyrics')54        self.find_name('publish').click()55        self.find_name('_save').click()56        self.assertIn('First song', self.find_tag('body').text)57        # He adds an unpublished song58        self.find_link('ADD SONG').click()59        self.find_name('title').send_keys('Second song')60        self.find_name('_save').click()61        self.assertIn('Second song', self.find_tag('body').text)62        # He verifies that only the published song is on the site63        self.get_url('/songs')64        self.assertIn('Songs', self.browser.title)65        self.assertNotIn('Second song', self.find_tag('body').text)66        self.find_link('First song').click()67        self.assertIn('First song', self.browser.title)68        # He adds the songs to the release69        self.get_url('/admin')70        self.find_link('Songs').click()71        self.find_link('First song').click()72        self.find_select('release').select_by_visible_text('First release')73        self.find_name('track').send_keys('1')74        self.find_name('_save').click()75        self.find_link('Second song').click()76        self.find_select('release').select_by_visible_text('First release')77        self.find_name('track').send_keys('2')78        self.find_name('_save').click()79        # He verifies that only the published song is shown on the release80        self.get_url('/music/first-release')81        self.assertIn('First song', self.find_tag('body').text)82        self.assertNotIn('Second song', self.find_tag('body').text)83class VideoTestCase(AdminTestCase):84    # TODO: Duplicated in .test_models.VideoAutofillTestCase85    CASSETTE = 'hth/music/tests/fixtures/cassettes/vimeo.yaml'86    SOURCE_URL = 'https://vimeo.com/126794989'87    PREVIEW_URL = 'http://i.vimeocdn.com/video/517362144_640.jpg'88    EMBED_CODE = ('<iframe src="http://player.vimeo.com/video/126794989"'89                  ' seamless allowfullscreen></iframe>\n')90    def setUp(self):91        super().setUp()92        PublishedReleaseFactory.create(title='First release',93                                       slug='first-release')94    def test_can_create_video(self):95        # Ryan logs into the admin96        self.adminLogin()97        # He adds a published video98        self.find_link('Videos').click()99        self.find_link('ADD VIDEO').click()100        self.find_name('title').send_keys('First video')101        self.find_name('source_url').send_keys('http://localhost')102        self.find_name('embed_code').send_keys('<iframe></iframe>')103        self.find_name('preview_url').send_keys('http://localhost/jpg')104        self.find_name('description').send_keys('Video description')105        self.find_name('credits').send_keys('Video credits')106        self.find_name('publish').click()107        self.find_name('_save').click()108        self.assertIn('First video', self.find_tag('body').text)109        # He adds an unpublished video110        self.find_link('ADD VIDEO').click()111        self.find_name('title').send_keys('Second video')112        self.find_name('_save').click()113        self.assertIn('Second video', self.find_tag('body').text)114        # He verifies that only the published video is on the site115        self.get_url('/video')116        self.assertNotIn('Second video', self.find_tag('body').text)117        self.find_link('First video').click()118        self.assertIn('First video', self.browser.title)119        # He adds the videos to the release120        self.get_url('/admin')121        self.find_link('Videos').click()122        self.find_link('First video').click()123        self.find_select('release').select_by_visible_text('First release')124        self.find_name('_save').click()125        self.find_link('Second video').click()126        self.find_select('release').select_by_visible_text('First release')127        self.find_name('_save').click()128        # He verifies that only the published video is shown on the release129        self.get_url('/music/first-release')130        self.assertIn('First video', self.find_tag('body').text)131        self.assertNotIn('Second video', self.find_tag('body').text)132        self.find_link('First video').click()133        self.assertIn('First video', self.browser.title)134    @vcr.use_cassette(CASSETTE)135    def test_autofill_from_source(self):136        # Ryan logs into the admin137        self.adminLogin()138        # He adds a published video, without preview_url and embed_code139        self.find_link('Videos').click()140        self.find_link('ADD VIDEO').click()141        self.find_name('title').send_keys('First video')142        self.find_name('source_url').send_keys(self.SOURCE_URL)143        self.find_name('publish').click()144        self.find_name('_continue').click()145        # He verifies that the preview_url and embed_code have been filled146        self.assertEqual(self.PREVIEW_URL,147                         self.find_name('preview_url').get_attribute('value'))148        self.assertEqual(self.EMBED_CODE.strip(),149                         self.find_name('embed_code').text)150        # He verifies that the published video is on the site151        self.get_url('/video')152        self.find_link('First video').click()153        self.assertIn('First video', self.browser.title)154class PressTestCase(AdminTestCase):155    def setUp(self):156        super().setUp()157        PublishedReleaseFactory.create(title='First release',158                                       slug='first-release')159    def test_can_create_quote(self):160        # Ryan logs into the admin161        self.adminLogin()162        # He adds a published quote163        self.find_link('Press').click()164        self.find_link('ADD PRESS').click()165        self.find_name('title').send_keys('First source')166        self.find_name('source_url').send_keys('http://example.com')167        self.find_name('date').send_keys(date_format(from_today(-30)))168        self.find_name('body').send_keys('First quote')169        self.find_name('publish').click()170        self.find_name('_save').click()171        self.assertIn('First source', self.find_tag('body').text)172        # He adds an unpublished quote173        self.find_link('ADD PRESS').click()174        self.find_name('title').send_keys('Second source')175        self.find_name('source_url').send_keys('http://foo.com')176        self.find_name('date').send_keys(date_format(from_today(-30)))177        self.find_name('_save').click()178        self.assertIn('Second source', self.find_tag('body').text)179        # He verifies that only the published quote is on the site180        self.get_url('/press')181        self.assertIn('Press', self.browser.title)182        self.assertIn('First source', self.find_tag('body').text)183        self.assertIn('First quote', self.find_tag('body').text)184        self.assertNotIn('Second source', self.find_tag('body').text)185        self.assertNotIn('Second quote', self.find_tag('body').text)186        # He adds the quotes to the release187        self.get_url('/admin')188        self.find_link('Press').click()189        self.find_link('First source').click()190        self.find_select('release').select_by_visible_text('First release')191        self.find_name('_save').click()192        self.find_link('Second source').click()193        self.find_select('release').select_by_visible_text('First release')194        self.find_name('_save').click()195        # He verifies that only the published quote is shown on the release196        self.get_url('/music/first-release')197        self.assertIn('First source', self.find_tag('body').text)198        self.assertNotIn('Second source', self.find_tag('body').text)199    def test_can_create_post(self):200        # Ryan logs into the admin201        self.adminLogin()202        # He adds a published press post203        self.find_link('Press').click()204        self.find_link('ADD PRESS').click()205        self.find_name('title').send_keys('Post title')206        self.find_name('body').send_keys('Post body')207        self.find_name('date').send_keys(date_format(from_today(-30)))208        self.find_name('quote').click()209        self.find_name('publish').click()210        self.find_name('_save').click()211        self.assertIn('Post title', self.find_tag('body').text)212        # He verifies that the post is on the site213        self.get_url('/press')214        self.assertIn('Press', self.browser.title)215        self.assertIn('Post title', self.find_tag('body').text)216        self.assertIn('Post body', self.find_tag('body').text)217        # He adds the post to the release218        self.get_url('/admin')219        self.find_link('Press').click()220        self.find_link('Post title').click()221        self.find_select('release').select_by_visible_text('First release')222        self.find_name('_save').click()223        # He verifies that post is shown on the release224        self.get_url('/music/first-release')...

scrape.py

Source:scrape.py

...18        #     nwl_scraper = splits()19        #     self.team1 = nwl_scraper.team1()20        #     self.team2 = nwl_scraper.team2()21    ''' ----------------------------------- Scraping Functions --------------------------------------- '''22    def find_link(self, url, link_text, second_check = False):23        if second_check == True:24            if self.team1yr == '2022' and self.level.lower() == 'other':25                html = BeautifulSoup(requests.get(url).text, features= 'lxml')26                year, name, num_teams = self.sewp_info(html)27                num_teams = int(num_teams)28                s = "".join(c for c in html.find_all(text=Comment) if "table_container" in c)29                soup = BeautifulSoup(s, "html.parser")30                team_links = [('https://baseball-reference.com' + a['href']) for a in soup.select('[href*="/register/team.cgi?id="]')][:num_teams]31                return team_links32        elif second_check == False:33            # if self.team1yr != '2022' and self.team2yr != '2022':34                html = BeautifulSoup(requests.get(url).text, features = 'lxml')35                a_tags = html.find_all('a', href = True)36                return [link['href'] for link in a_tags if link.text == link_text]37    def parse_row(self, row):38        return [str(x.string) for x in row.find_all('td')]39    def sewp_info(self, html):40        spans = html.find_all('span')41        p_tags = html.find_all('p')42        return spans[8].text, spans[9].text, p_tags[1].text.split()[3]43    ''' ------------------------------- Baseball Specific Functions ---------------------------------- '''44    def find_baseball_data(self, url, teamyr = ''):45        if self.level.lower() == 'mlb':46            if teamyr != '2022':47                hitting_data = pd.read_html(url)[0].dropna(how = 'all').fillna(0)48                pitching_data = pd.read_html(url)[1].dropna(how = 'all').fillna(0)49            elif teamyr == '2022':50                # have to change the index because of the schedule tables in the current season51                hitting_data = pd.read_html(url)[-2].dropna(how = 'all').fillna(0)52                pitching_data = pd.read_html(url)[-1].dropna(how = 'all').fillna(0)53            hitting_data = hitting_data[hitting_data['Name'] != 'Name']54            pitching_data = pitching_data[pitching_data['Name'] != 'Name']55            hitting_data['Name'] = hitting_data['Name'].str.replace('*', '', regex = False)56            hitting_data['Name'] = hitting_data['Name'].str.replace('#', '', regex = False)57            pitching_data['Name'] = pitching_data['Name'].str.replace('*', '')58            pitching_data['Name'] = pitching_data['Name'].str.replace('#', '')59        elif self.level.lower() == 'other':60            # find pitching data in the comments61            hitting_data = pd.read_html(url)[0].dropna(how = 'all').fillna(0)62            hitting_data['Name'] = hitting_data['Name'].str.replace('*', '').str.replace('#', '')63            soup = BeautifulSoup(requests.get(url).text,'lxml')64            pitching_data = pd.read_html([x for x in soup.find_all(string=lambda text: isinstance(text, Comment)) if 'id="div_team_pitching"' in x][0])[0]65            pitching_data['Name'] = pitching_data['Name'].str.replace('*', '').str.replace('#', '')66        # in the future we should include statcast data and probs if the years are 2015 or later67        return hitting_data, pitching_data, hitting_data.columns, pitching_data.columns68    # sport specific functions69    def baseball(self):70        default_url = 'https://baseball-reference.com'71        def_url_other = 'https://www.baseball-reference.com/register/league.cgi'72        if self.level.lower() == 'other':73            league_link = default_url + self.find_link(def_url_other, self.league)[0]74            if self.team1yr == '2022':75                yr_link1 = self.find_link(league_link, self.team1yr)[2]76                team_links = self.find_link(default_url + yr_link1, self.team1, second_check= True)77                the_link = []78                # try em all79                for link in team_links:80                    html = BeautifulSoup(requests.get(link).text, features = 'lxml')81                    yr, name, num_teams = self.sewp_info(html)82                    if name == self.team1:83                        the_link.append(link)84                        break85                hit1, pit1, hit_cols, pit_cols = self.find_baseball_data(the_link[0])86            if self.team2yr == '2022':87                yr_link1 = self.find_link(league_link, self.team1yr)[2]88                team_links = self.find_link(default_url + yr_link1, self.team1, second_check= True)89                the_link = []90                # try em all91                for link in team_links:92                    html = BeautifulSoup(requests.get(link).text, features = 'lxml')93                    yr, name, num_teams = self.sewp_info(html)94                    if name == self.team2:95                        the_link.append(link)96                        break97                hit2, pit2, hit_cols, pit_cols = self.find_baseball_data(the_link[0])98            if self.team1yr != '2022':99                yr_link1 = default_url + self.find_link(league_link, self.team1yr)[2]100                team1_link = default_url + self.find_link(yr_link1, self.team1)[0]101                hit1, pit1, hit_cols, pit_cols = self.find_baseball_data(team1_link)102            if self.team2yr != '2022':103                yr_link2 = default_url + self.find_link(league_link, self.team2yr)[2]104                team2_link = default_url + self.find_link(yr_link2, self.team2)[0]105                hit2, pit2, hit_cols, pit_cols = self.find_baseball_data(team2_link)106            if self.team1yr != '2022' and self.team2yr != '2022':107                yr_link1 = default_url + self.find_link(league_link, self.team1yr)[2]108                yr_link2 = default_url + self.find_link(league_link, self.team2yr)[2]109                team1_link = default_url + self.find_link(yr_link1, self.team1)[0]110                team2_link = default_url + self.find_link(yr_link2, self.team2)[0]111                hit1, pit1, hit_cols, pit_cols = self.find_baseball_data(team1_link)112                hit2, pit2, hit_cols, pit_cols = self.find_baseball_data(team2_link)113            return hit1, pit1, hit2, pit2, hit_cols, pit_cols114            115        # mlb branch116        elif self.team1yr != '2022' and self.team2yr != '2022':117            # find year links for each team118            yr_link1 = default_url + self.find_link('https://www.baseball-reference.com/leagues/', self.team1yr)[0]119            yr_link2 = default_url + self.find_link('https://www.baseball-reference.com/leagues/', self.team2yr)[0]120            # find team links for each team121            team1_link = default_url + self.find_link(yr_link1, self.team1)[0]122            team2_link = default_url + self.find_link(yr_link2, self.team2)[0]123            # get probability data from each team link124            hit1, pit1, hit_cols, pit_cols = self.find_baseball_data(team1_link)125            hit2, pit2, hit_cols1, pit_cols1 = self.find_baseball_data(team2_link)126            return hit1, pit1, hit2, pit2, hit_cols, pit_cols127        128        # 2022 mlb branch since the current season html is different129        elif self.level.lower() == 'mlb' and self.team1yr == '2022' or self.team2yr == '2022':130            def_url = 'https://www.baseball-reference.com/leagues/majors/2022.shtml'131            # team1132            if self.team1yr == '2022':133                link1 = default_url + self.find_link(def_url, self.team1)[0]134                hit1, pit1, hit_cols, pit_cols = self.find_baseball_data(link1, teamyr = self.team1yr)135            elif self.team1yr != '2022':136                # find year links for each team137                yr_link1 = default_url + self.find_link('https://www.baseball-reference.com/leagues/', self.team1yr)[0]138                # find team links for each team139                team1_link = default_url + self.find_link(yr_link1, self.team1)[0]140                # get probability data from each team link141                hit1, pit1, hit_cols, pit_cols = self.find_baseball_data(team1_link)142            if self.team2yr == '2022':143                link2 = default_url + self.find_link(def_url, self.team2)[0]144                hit2, pit2, hit_cols, pit_cols = self.find_baseball_data(link2, teamyr = self.team2yr)145            elif self.team2yr != '2022':146            # find year links for each team147                yr_link2 = default_url + self.find_link('https://www.baseball-reference.com/leagues/', self.team2yr)[0]148                # find team links for each team149                team2_link = default_url + self.find_link(yr_link2, self.team2)[0]150                # get probability data from each team link151                hit2, pit2, hit_cols1, pit_cols1 = self.find_baseball_data(team2_link)152            ...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.