How to use _decode_url method in tempest

Best Python code snippet using tempest_python

abstractParser.py

Source:abstractParser.py Github

copy

Full Screen

1# -*- coding: UTF-8 -*-2'''3abstractParser.py4Copyright 2006 Andres Riancho5This file is part of w3af, w3af.sourceforge.net .6w3af is free software; you can redistribute it and/or modify7it under the terms of the GNU General Public License as published by8the Free Software Foundation version 2 of the License.9w3af is distributed in the hope that it will be useful,10but WITHOUT ANY WARRANTY; without even the implied warranty of11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the12GNU General Public License for more details.13You should have received a copy of the GNU General Public License14along with w3af; if not, write to the Free Software15Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA16'''17import core.controllers.outputManager as om18from core.controllers.w3afException import w3afException19from core.data.parsers.encode_decode import htmldecode20from core.data.parsers.urlParser import url_object21import re22import urllib23class abstractParser(object):24 '''25 This class is an abstract document parser.26 27 @author: Andres Riancho ( andres.riancho@gmail.com )28 '''29 def __init__( self, httpResponse ):30 # "setBaseUrl"31 url = httpResponse.getURL()32 redirURL = httpResponse.getRedirURL()33 if redirURL:34 url = redirURL35 36 self._baseUrl = url37 self._baseDomain = url.getDomain()38 self._rootDomain = url.getRootDomain()39 40 # A nice default41 self._encoding = 'utf-8'42 43 # To store results44 self._emails = []45 self._re_URLs = []46 47 def findEmails( self , documentString ):48 '''49 @return: A list with all mail users that are present in the documentString.50 Init,51 >>> from core.data.url.httpResponse import httpResponse as httpResponse52 >>> u = url_object('http://www.w3af.com/')53 >>> response = httpResponse( 200, '', {}, u, u )54 >>> a = abstractParser(response)55 56 First test, no emails.57 >>> a.findEmails( '' )58 []59 60 >>> a = abstractParser(response)61 >>> a.findEmails( ' abc@w3af.com ' )62 ['abc@w3af.com']63 64 >>> a = abstractParser(response)65 >>> a.findEmails( '<a href="mailto:abc@w3af.com">test</a>' )66 ['abc@w3af.com']67 >>> a = abstractParser(response)68 >>> a.findEmails( '<a href="mailto:abc@w3af.com">abc@w3af.com</a>' )69 ['abc@w3af.com']70 >>> a = abstractParser(response)71 >>> a.findEmails( '<a href="mailto:abc@w3af.com">abc_def@w3af.com</a>' )72 ['abc@w3af.com', 'abc_def@w3af.com']73 >>> a = abstractParser(response)74 >>> a.findEmails( 'header abc@w3af-scanner.com footer' )75 ['abc@w3af-scanner.com']76 77 >>> a = abstractParser(response)78 >>> a.findEmails( 'header abc4def@w3af.com footer' )79 ['abc4def@w3af.com']80 '''81 # First, we decode all chars. I have found some strange sites where they encode the @... some other82 # sites where they encode the email, or add some %20 padding... strange stuff... so better be safe...83 documentString = urllib.unquote_plus( documentString )84 85 # Now we decode the HTML special characters...86 documentString = htmldecode( documentString )87 88 # Perform a fast search for the @. In w3af, if we don't have an @ we don't have an email89 # We don't support mails like myself <at> gmail !dot! com90 if documentString.find('@') != -1:91 documentString = re.sub( '[^\w@\-\\.]', ' ', documentString )92 # NOTE: emailRegex is also used in pks search engine.93 # Now we have a clean documentString; and we can match the mail addresses!94 emailRegex = '([A-Z0-9\._%-]{1,45}@([A-Z0-9\.-]{1,45}\.){1,10}[A-Z]{2,4})'95 for email, domain in re.findall(emailRegex, documentString, re.IGNORECASE):96 if email not in self._emails:97 self._emails.append( email )98 99 return self._emails100 def _regex_url_parse(self, httpResponse):101 '''102 Use regular expressions to find new URLs.103 104 @parameter httpResponse: The http response object that stores the response body and the URL.105 @return: None. The findings are stored in self._re_URLs as url_objects106 Init,107 >>> from core.data.url.httpResponse import httpResponse as httpResponse108 >>> u = url_object('http://www.w3af.com/')109 >>> response = httpResponse( 200, '', {}, u, u )110 >>> a = abstractParser(response)111 112 Simple, empty result113 >>> a = abstractParser(response)114 >>> response = httpResponse( 200, '', {}, u, u )115 >>> a._regex_url_parse( response )116 >>> a._re_URLs117 []118 Full URL119 >>> a = abstractParser(response)120 >>> response = httpResponse( 200, 'header http://www.w3af.com/foo/bar/index.html footer', {}, u, u )121 >>> a._regex_url_parse( response )122 >>> a._re_URLs[0].url_string123 'http://www.w3af.com/foo/bar/index.html'124 One relative URL125 >>> a = abstractParser(response)126 >>> response = httpResponse( 200, 'header /foo/bar/index.html footer', {}, u, u )127 >>> a._regex_url_parse( response )128 >>> a._re_URLs[0].url_string129 'http://www.w3af.com/foo/bar/index.html'130 Relative with initial "/" , inside an href131 >>> a = abstractParser(response)132 >>> response = httpResponse( 200, 'header <a href="/foo/bar/index.html">foo</a> footer', {}, u, u )133 >>> a._regex_url_parse( response )134 >>> a._re_URLs[0].url_string135 'http://www.w3af.com/foo/bar/index.html'136 Simple index relative URL137 >>> a = abstractParser(response)138 >>> response = httpResponse( 200, 'header <a href="index">foo</a> footer', {}, u, u )139 >>> a._regex_url_parse( response )140 >>> len( a._re_URLs )141 0142 '''143 #url_regex = '((http|https):[A-Za-z0-9/](([A-Za-z0-9$_.+!*(),;/?:@&~=-])|%[A-Fa-f0-9]{2})+(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*(),;/?:@&~=%-]*))?)'144 url_regex = '((http|https)://([a-zA-Z0-9_:@\-\./]*?)/[^ \n\r\t"\'<>]*)'145 146 for url in re.findall(url_regex, httpResponse.getBody() ):147 # This try is here because the _decode_URL method raises an exception148 # whenever it fails to decode a url.149 try:150 decoded_url = self._decode_URL( url_object(url[0]) , self._encoding)151 except w3afException:152 pass153 else:154 self._re_URLs.append(decoded_url)155 156 #157 # Now detect some relative URL's ( also using regexs )158 #159 def find_relative( doc ):160 res = []161 162 # TODO: Also matches //foo/bar.txt and http://host.tld/foo/bar.txt163 # I'm removing those matches manually below164 regex = '((:?[/]{1,2}[A-Z0-9a-z%_\-~\.]+)+\.[A-Za-z0-9]{2,4}(((\?)([a-zA-Z0-9]*=\w*)){1}((&)([a-zA-Z0-9]*=\w*))*)?)'165 relative_regex = re.compile( regex )166 167 for match_tuple in relative_regex.findall(doc):168 169 match_string = match_tuple[0]170 171 #172 # And now I filter out some of the common false positives173 #174 if match_string.startswith('//'):175 continue176 177 if match_string.startswith('://'):178 continue179 if re.match('HTTP/\d\.\d', match_string):180 continue181 182 # Matches "PHP/5.2.4-2ubuntu5.7" , "Apache/2.2.8", and "mod_python/3.3.1"183 if re.match('.*?/\d\.\d\.\d', match_string):184 continue185 #186 # Filter finished.187 #188 189 url = httpResponse.getURL().urlJoin( match_string )190 url = self._decode_URL( url , self._encoding)191 res.append( url )192 193 return res194 195 relative_URLs = find_relative( httpResponse.getBody() )196 self._re_URLs.extend( relative_URLs )197 [ i.normalizeURL() for i in self._re_URLs ]198 self._re_URLs = list(set(self._re_URLs)) 199 def getEmails( self, domain=None ):200 '''201 @parameter domain: Indicates what email addresses I want to retrieve: "*@domain".202 @return: A list of email accounts that are inside the document.203 204 >>> from core.data.url.httpResponse import httpResponse as httpResponse205 >>> u = url_object('http://www.w3af.com/')206 >>> response = httpResponse( 200, '', {}, u, u )207 >>> a = abstractParser(response)208 >>> a._emails = ['a@w3af.com', 'foo@not-w3af.com']209 210 >>> a.getEmails()211 ['a@w3af.com', 'foo@not-w3af.com']212 >>> a.getEmails( domain='w3af.com')213 ['a@w3af.com']214 >>> a.getEmails( domain='not-w3af.com')215 ['foo@not-w3af.com']216 217 '''218 if domain:219 return [ i for i in self._emails if domain == i.split('@')[1] ]220 else:221 return self._emails222 223 def getForms( self ):224 '''225 @return: A list of forms.226 ''' 227 raise Exception('You should create your own parser class and implement the getForms() method.')228 229 def getReferences( self ):230 '''231 Searches for references on a page. w3af searches references in every html tag, including:232 - a233 - forms234 - images235 - frames236 - etc.237 238 @return: Two sets, one with the parsed URLs, and one with the URLs that came out of a239 regular expression. The second list if less trustworthy.240 '''241 raise Exception('You should create your own parser class and implement the getReferences() method.')242 243 def getComments( self ):244 '''245 @return: A list of comments.246 ''' 247 raise Exception('You should create your own parser class and implement the getComments() method.')248 249 def getScripts( self ):250 '''251 @return: A list of scripts (like javascript).252 ''' 253 raise Exception('You should create your own parser class and implement the getScripts() method.')254 255 def getMetaRedir( self ):256 '''257 @return: Returns list of meta redirections.258 '''259 raise Exception('You should create your own parser class and implement the getMetaRedir() method.')260 261 def getMetaTags( self ):262 '''263 @return: Returns list of all meta tags.264 '''265 raise Exception('You should create your own parser class and implement the getMetaTags() method.')266 267 def _decode_URL(self, url_object_to_decode, encoding):268 '''269 This is one of the most important methods, because it will decode any URL270 and return an utf-8 encoded string. In other words, this methods does c14n (Canonicalization)271 (http://en.wikipedia.org/wiki/Canonicalization) and allows all layers of w3af to simply ignore the272 encoding of the HTTP body (if that's what they want).273 274 This method is very related to httpResponse._charset_handling(), which decodes the HTTP275 body of the response. The "problem" is that the body of the response is decoded as expected,276 but URLs aren't... why? Let's see an example:277 278 - HTTP Body: <a href="http://host.tld/%05%44">Click m\x05\x44!</a>279 - HTTP response header indicated encoding: xyz280 - After running _charset_handling() and supposing that "\x05\x44" decodes to "é" in xyz,281 the response is: <a href="http://host.tld/%05%44">Click mé!</a>282 283 As you may have noticed, the %05%44 (which in URL means "\x05\x44") wasn't decoded284 (as expected because the decoding method doesn't handle URL encoding AND xyz encoding at the285 same time!).286 287 So, when we use _decode_URL() we take as input "http://host.tld/%05%44", we decode the288 URL encoding to get "http://host.tld/\x05\x44" and finally we decode that with the xyz encoding289 to get "http://host.tld/é".290 Something small to remember:291 >>> urllib.unquote('ind%c3%a9x.html').decode('utf-8').encode('utf-8') == 'ind\xc3\xa9x.html'292 True293 294 Init,295 >>> from core.data.url.httpResponse import httpResponse as httpResponse296 >>> u = url_object('http://www.w3af.com/')297 >>> response = httpResponse( 200, '', {}, u, u )298 >>> a = abstractParser(response)299 Simple, no strange encoding300 >>> u = url_object('http://www.w3af.com/index.html')301 >>> print a._decode_URL( u , a._encoding ).url_string302 http://www.w3af.com/index.html303 Encoded304 >>> u = url_object('http://www.w3af.com/ind%c3%a9x.html')305 >>> print a._decode_URL( u , a._encoding ).url_string306 http://www.w3af.com/indéx.html307 Wrong parameter308 >>> print a._decode_URL( 'http://www.w3af.com/' , a._encoding )309 Traceback (most recent call last):310 File "<stdin>", line 1, in ?311 ValueError: The "url_object_to_decode" parameter @ _decode_URL of an abstractParser must be of urlParser.url_object type.312 '''313 if not isinstance(url_object_to_decode, url_object):314 msg = 'The "url_object_to_decode" parameter @ _decode_URL of an abstractParser'315 msg += ' must be of urlParser.url_object type.'316 raise ValueError( msg )317 318 # Avoid the double decoding performed by httpResponse._charset_handling() and319 # by this function in the cases like this link:320 #321 # http://host.tld/é.html322 #323 # Which is written without URL encoding.324 url_string = url_object_to_decode.url_string325 if urllib.unquote(url_string) == url_string:326 return url_object_to_decode327 328 try:329 decoded = urllib.unquote(url_string).decode(encoding).encode('utf-8')330 return url_object(decoded)331 except UnicodeDecodeError, ude:332 # This error could have been produced by the buggy choice of encoding333 # done by the user when calling _decode_URL with two parameters, 334 # or "selected by default". So, now we are going to test something different335 if encoding == 'utf-8':336 # Test an encoding that only uses one byte:337 decoded = urllib.unquote(url_string).decode('iso-8859-1').encode('utf-8')338 return url_object(decoded)339 elif encoding != 'utf-8':340 # Sometimes, the web app developers, their editors, or some other component341 # makes a mistake, and they are really encoding it with utf-8 and they say they are342 # doing it with some other encoding; this is why I perform this last test:343 try:344 decoded = urllib.unquote(url_string).decode('utf-8').encode('utf-8')345 return url_object(decoded)346 except UnicodeDecodeError, ude:347 msg = 'Failed to _decode_URL: "' + url_object_to_decode +'" using encoding: "' + encoding + '".'348 om.out.error(msg)...

Full Screen

Full Screen

git-clone

Source:git-clone Github

copy

Full Screen

...14# },15# }16_HOST_REWRITE = {17}18def _decode_url(url):19 '''Determine the key parts of a git clone url.20 >>> pprint(_decode_url('https://github.com/tsukasa-au/micropython.git'))21 {'compound_project': 'tsukasa-au/micropython', 'domain': 'github.com', 'project': 'micropython', 'user': 'tsukasa-au'}22 >>> pprint(_decode_url('https://github.com/makarandtapaswi/BallClustering_ICCV2019.git'))23 {'compound_project': 'makarandtapaswi/BallClustering_ICCV2019', 'domain': 'github.com', 'project': 'BallClustering_ICCV2019', 'user': 'makarandtapaswi'}24 >>> pprint(_decode_url('git@github.com:tsukasa-au/micropython.git'))25 {'compound_project': 'tsukasa-au/micropython', 'domain': 'github.com', 'project': 'micropython', 'user': 'tsukasa-au'}26 >>> pprint(_decode_url('https://gist.github.com/50b6cca61dd1c3f88f41.git'))27 {'compound_project': '50b6cca61dd1c3f88f41', 'domain': 'gist.github.com', 'project': '50b6cca61dd1c3f88f41', 'user': None}28 >>> pprint(_decode_url('git@gist.github.com:50b6cca61dd1c3f88f41.git'))29 {'compound_project': '50b6cca61dd1c3f88f41', 'domain': 'gist.github.com', 'project': '50b6cca61dd1c3f88f41', 'user': None}30 >>> pprint(_decode_url('https://git.code.sf.net/p/mcomix/git'))31 {'compound_project': 'mcomix', 'domain': 'sf.net', 'project': 'mcomix', 'user': None}32 >>> pprint(_decode_url('git://git@github.com:nickyringland/ncss.life.git'))33 {'compound_project': 'nickyringland/ncss.life', 'domain': 'github.com', 'project': 'ncss.life', 'user': 'nickyringland'}34 >>> pprint(_decode_url('git://git.code.sf.net/p/mcomix/git'))35 {'compound_project': 'mcomix', 'domain': 'sf.net', 'project': 'mcomix', 'user': None}36 >>> pprint(_decode_url('http://repo.or.cz/fast-export.git'))37 {'compound_project': 'fast-export', 'domain': 'repo.or.cz', 'project': 'fast-export', 'user': None}38 >>> pprint(_decode_url('git://git.videolan.org/libdvdnav.git'))39 {'compound_project': 'libdvdnav', 'domain': 'videolan.org', 'project': 'libdvdnav', 'user': None}40 >>> pprint(_decode_url('https://code.videolan.org/videolan/libdvdread.git'))41 {'compound_project': 'videolan/libdvdread', 'domain': 'videolan.org', 'project': 'libdvdread', 'user': 'videolan'}42 >>> pprint(_decode_url('https://gitlab.freedesktop.org/xorg/app/xrandr.git'))43 {'compound_project': 'xorg/app/xrandr', 'domain': 'gitlab.freedesktop.org', 'project': 'app/xrandr', 'user': 'xorg'}44 >>> pprint(_decode_url('https://chromium.googlesource.com/apps/libapps'))45 {'compound_project': 'chromium/apps/libapps', 'domain': 'googlesource.com', 'project': 'apps/libapps', 'user': 'chromium'}46 >>> pprint(_decode_url('https://git.nomology.id.au/preserve-modules'))47 {'compound_project': 'preserve-modules', 'domain': 'nomology.id.au', 'project': 'preserve-modules', 'user': None}48 >>> pprint(_decode_url('https://code.delx.net.au/webdl'))49 {'compound_project': 'webdl', 'domain': 'delx.net.au', 'project': 'webdl', 'user': None}50 >>> pprint(_decode_url('https://lore.kernel.org/linux-nfs/0'))51 {'compound_project': 'linux-nfs/0', 'domain': 'lore.kernel.org', 'project': '0', 'user': 'linux-nfs'}52 >>> pprint(_decode_url('http://localhost/abc.git'))53 Traceback (most recent call last):54 ...55 KeyError: 'Unknown host'56 >>> 57 '''58 RES = [59 # Sourceforge60 r'(?P<scheme>https?|git)://(?:git\.)?(?:code\.)?(?P<domain>sf\.net)/p/(?P<project>[^/]+)/git',61 # Google Source62 r'(?P<scheme>https?)://(?P<user>[^.]+)[.](?P<domain>googlesource\.com)/(?P<project>.+)',63 # Gist64 r'(?P<scheme>https?)://(?P<domain>gist\.github\.com)/(?P<project>[^.]+).git',65 # Generic case.66 r'(?P<scheme>https?)://(?P<domain>(?:git|code)[.][^/]+\.[^/]+)/(?:(?P<user>[a-zA-Z0-9_-]+)/)?(?P<project>(?:[a-zA-Z0-9_-]+/)?[a-zA-Z0-9_.-]+?)(?:\.git)?$',67 r'(?P<scheme>https?)://(?P<domain>[^/]+\.[^/]+)/(?:(?P<user>[a-zA-Z0-9_-]+)/)?(?P<project>(?:[a-zA-Z0-9_-]+/)?[a-zA-Z0-9_.-]+)\.git',68 r'(?P<scheme>git://)?(?:[a-zA-Z0-9_-]+@)?(?P<domain>[^.:]+\.[^:]+)[:/](?:(?P<user>[a-zA-Z0-9_-]+)/)?(?P<project>[a-zA-Z0-9_.-]+)\.git',69 # Linux kernel70 r'(?P<scheme>https?)://(?P<domain>(?:.+[.])?kernel[.]org)/(?:(?P<user>[a-zA-Z0-9_-]+)/)?(?P<project>(?:[a-zA-Z0-9_-]+/)?[a-zA-Z0-9_.-]+?)(?:\.git)?$',71 ]72 DOMAIN_PREFIXES_TO_STRIP = {'git', 'code'}73 for regexp in RES:74 m = re.match(regexp, url)75 if not m:76 continue77 d = m.groupdict()78 # Cleanup the domain79 # NOTE: We strip off the subdomains 'git' and 'code', though ensure there80 # is at least some part of the domain name left (there must be at least 181 # dot).82 domain_parts = d['domain'].split('.')83 domain_parts.reverse()84 while len(domain_parts) > 2 and domain_parts[-1] in DOMAIN_PREFIXES_TO_STRIP:85 domain_parts.pop()86 d['domain'] = '.'.join(reversed(domain_parts))87 # Rewrite the short hostname to the full hostname for our directory structure.88 if d['scheme'] in _HOST_REWRITE:89 _rewrite_rules = _HOST_REWRITE[d['scheme']]90 if d['domain'] in _rewrite_rules:91 d['domain'] = _rewrite_rules[d['domain']]92 return {93 'domain': d['domain'],94 'user': d.get('user'),95 'project': d['project'],96 'compound_project': '{}/{}'.format(d['user'], d['project']) if d.get('user') else d['project'],97 }98 raise KeyError('Unknown host')99def _ensure_dir_exists(dirname):100 def _walk_path(dirname):101 parts = dirname.split('/')102 parts.reverse()103 output_parts = [parts.pop()]104 while parts:105 output_parts.append(parts.pop())106 yield '/'.join(output_parts)107 if not os.path.exists(dirname):108 for partial_dirname in _walk_path(dirname):109 if not os.path.exists(partial_dirname):110 os.mkdir(partial_dirname)111def get_parser():112 parser = optparse.OptionParser()113 parser.add_option(114 '--mirror',115 action='store_true', dest='mirror', default=False,116 help='Mirror all refs/branches from the remote to the local')117 return parser118def main():119 opt_parser = get_parser()120 options, args = opt_parser.parse_args()121 if len(args) != 1:122 opt_parser.error(f'Must provide exactly 1 argument, the git repo to clone. Got {len(args)}: {args!r}')123 clone_url, = args124 url_parts = _decode_url(clone_url)125 # Ensure that the directory up to the domain exists before we call git.126 base_dir = os.path.join(127 os.environ['HOME'], 'Projects', 'src')128 _ensure_dir_exists(base_dir)129 src_base_dir = os.path.join(base_dir, url_parts['domain'])130 _ensure_dir_exists(src_base_dir)131 # Git will make sure that the final directory exists (in case132 # compound_project has a '/' in it).133 cmd = ['git', 'clone']134 # Check if we need any additional flags to git.135 if options.mirror:136 cmd += ['--mirror']137 # Now that we have (possibly) added our flags, specify the src repo and dest138 # directory....

Full Screen

Full Screen

vencode

Source:vencode Github

copy

Full Screen

...33 b = base64.b64decode(b)34 print("%s => %s" % (s, b.decode("UTF-8")))35def _encode_url(s):36 print("%s => %s" % (s, parse.quote(s)))37def _decode_url(s):38 print("%s => %s" % (s, parse.unquote(s)))39if __name__ == "__main__":40 try:41 opts, args = getopt.getopt(42 sys.argv[1:],43 "e:d:h",44 ["encode=", "decode=", "help"],45 )46 for opt in opts:47 if opt[0] in ("-h", "--help"):48 _help()49 for opt in opts:50 if opt[0] in ("-e", "--encode"):51 if opt[1] == "base64":52 for a in args:53 _encode_base64(a)54 exit(0)55 elif opt[1] == "url":56 for a in args:57 _encode_url(a)58 exit(0)59 elif opt[0] in ("-d", "--decode"):60 if opt[1] == "base64":61 for a in args:62 _decode_base64(a)63 exit(0)64 elif opt[1] == "url":65 for a in args:66 _decode_url(a)67 exit(0)68 _help()69 except getopt.GetoptError as e:70 print(e)71 _help()72 except Exception as e:...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run tempest automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful