Best Webmock_ruby code snippet using HTTP.replay
parser.rb
Source:parser.rb
1require "nokogiri"2# The parser handles the parsing of list of replays (the replays index)3class Parser4 class << self5 # Parse the HTML of a list of replays and return replay data6 # as an array of hashes, each hash being a replay.7 #8 # Following data is available for each replay:9 # * :id => the gosugamers replay id10 # - :sentinel => the sentinel team name11 # - :scourge => the scourge team name12 # - :version => replay dota version13 # - :event => the event that the game was played at14 # - :rating => the gosugamers rating for the replay15 # - :dl_count => the gosugamers download count for the replay16 # - :date => the date the replay was put online in gosugamers17 # - :link => the gosugamers replay link (relative to http://www.gosugamers.net/dota)18 #19 # @param [String] html the html content20 # @return [Array<{Symbol => String}>] replay data21 # @example22 # index = HTTP.download("http://www.gosugamers.net/dota/replays")23 # replays = Parser.parse_replay_list(index)24 # replay = replays[0]25 #26 # puts "Latest replay:"27 # puts "Sentinel team was: #{replay[:sentinel]}"28 # puts "Scourge team was: #{replay[:scourge]}"29 # puts "Replay version: #{replay[:version]}"30 # puts "Replay link: #{replay[:link]}"31 def parse_replay_list(html)32 document = Nokogiri::HTML(normalize_tr(html))33 replays = []34 document.xpath("//tr[@class=\"wide_middle\"]").each do |row|35 replays << parse_replay_tr(row)36 end37 replays38 end39 # Parse the HTML of a list of replays and return the next pages links40 # as an array of strings41 #42 # @param [String] html the html content43 # @return [Array<String>] links url44 # @example45 # index = HTTP.download("http://www.gosugamers.net/dota/replays")46 # pages = Parser.parse_replay_list_pages(index)47 #48 # p pages49 # # => ["replays.php?&start=30", "replays.php?&start=60", "replays.php?&start=90"]50 #51 # @example52 # index = HTTP.download("http://www.gosugamers.net/dota/replays")53 # pages = Parser.parse_replay_list_pages(index)54 # second_page = pages[0]55 #56 # second_page_list = HTTP.download("http://www.gosugamers.net/dota/#{second_page}")57 # replays = Parser.parse_replay_list(second_page_list)58 # replay = replays[0]59 #60 # puts "First replay from second page:"61 # puts "Sentinel team was: #{replay[:sentinel]}"62 # puts "Scourge team was: #{replay[:scourge]}"63 # puts "Replay version: #{replay[:version]}"64 # puts "Replay link: #{replay[:link]}"65 def parse_replay_list_pages(html)66 document = Nokogiri::HTML(html)67 pages = []68 # xpath: get all anchors from a td with a wide_middle class and 800 width69 nodeset = document.xpath("//td[@class=\"wide_middle\" and @width=800]/a")70 values = nodeset.first(5).map { |link| parse_href_start_number(link) }71 step = values[1] - values[0]72 first_link = values[0]73 last_link = values[4]74 (first_link..last_link).step(step) do |value|75 pages << "replays.php?&start=#{value}"76 end77 pages78 end79 private80 def parse_replay_tr(tr)81 innermost = tr.children.children.children82 link = tr.first(2)[1][1]83 replay = { :id => link.split("/")[2],84 :sentinel => innermost[2].text,85 :scourge => innermost[3].text,86 :version => innermost[4].text,87 :event => innermost[5].text,88 :rating => innermost[6].text,89 :dl_count => innermost[7].text,90 :date => innermost[8].text,91 :link => link }92 replay.each_value(&:strip!)93 end94 # Gosugamers html sucks95 # We have to close the tr tags ourselves so Nokogiri can work properly96 def normalize_tr(html)97 html.gsub(/(\d{4}-\d{2}-\d{2})(\s+)?<tr class="wide_middle"/, '\\1</tr><tr class="wide_middle"')98 end99 # Get the numerical "start" value from the url100 # Example:101 # link = "replay.php?&start=30"102 # parse_href_start_number(link)103 # # => 30104 def parse_href_start_number(link)105 link["href"].split("=")[1].to_i106 end107 end108end...
crawler.rb
Source:crawler.rb
1require_relative "replay_index"2require_relative "parser"3require_relative "http"4# The Crawler class crawls gosugamers replay pages, extracting the replays main information and saving them to an index<br />5# {#crawl} provides a good level of control over the crawling process. provided that you supply a block to it. <br />6class Crawler7 # A ReplayIndex instance, by default8 # @return [ReplayIndex]9 attr_accessor :index10 # Base URL to start crawling.<br />11 # Default is the Gosugamers replays index page12 # @return [String]13 attr_accessor :base_url14 # @param [String] index_index index filename15 def initialize(index_file = "")16 @index = ReplayIndex.new(index_file)17 @base_url = "http://www.gosugamers.net/dota/replays"18 end19 # Crawls {#base_url}, jumping from page to page, recovering replay data and saving them to an index<br />20 # This method is auxiliated by {HTTP}, {Parser} and {ReplayIndex} ({#index})21 #22 # If a block is supplied, crawl yields two values. The first is the current page number being crawled, after23 # it has been crawled, parsed and indexed.<br /> The second is a constant, the total page count. Note that24 # not necessarily +crawl+ will crawl all these pages. If the crawler finds a replay that already exists, it simply stops.25 #26 # @param [Block] blk optional27 # @yield [count, total] current page number and the total page count28 #29 # @example30 # crawler = Crawler.index("index.idx")31 # crawler.crawl32 #33 # @example34 # crawler = Crawler.index("index.idx")35 # crawler.crawl do |count, total|36 # percentage = (count * 100.0) / total37 # puts "Progress: #{percentage} (#{count}/#{total})"38 # end39 #40 # @example41 # crawler = Crawler.index("index.idx")42 # crawler.crawl do |count, total|43 # break if count >= 5 # crawl and index only the first five pages44 # end45 def crawl(&blk)46 first_page = HTTP.download(@base_url)47 pages = Parser.parse_replay_list_pages(first_page)48 pages.reverse!.push(@base_url).reverse!49 if block_given?50 crawl_pages_with_progress(pages, &blk)51 else52 crawl_pages(pages)53 end54 end55 private56 def crawl_pages(pages)57 crawl_pages_with_progress(pages) do58 # do nothing59 end60 end61 def crawl_pages_with_progress(pages)62 i = 063 @continue = true64 pages.each do |page|65 crawl_page(page)66 break unless @continue67 @index.save68 i += 169 yield i, pages.length70 end71 end72 def crawl_page(page)73 content = HTTP.download(join_url(page))74 replays = Parser.parse_replay_list(content)75 replays.each do |replay|76 if index.replay_exist?(replay[:id])77 @continue = false78 else79 index.add_replay(replay)80 end81 end82 end83 # Parser returns relative URL pages, relative to gosugamers.net/dota/84 # We have to join them, but only if the url is not absolute85 def join_url(url)86 if @base_url =~ /gosugamers\.net/ && url !~ /gosugamers\.net/87 "http://www.gosugamers.net/dota/#{url}"88 else89 url90 end91 end92end...
crawler_spec.rb
Source:crawler_spec.rb
...6 @crawler.index = @index7 end8 describe "crawling" do9 it "should follow pages" do10 Parser.stub(:parse_replay_list).and_return([])11 Parser.stub(:parse_replay_list_pages).and_return(["page1", "page2"])12 @index.stub(:save)13 pages = []14 HTTP.stub(:download) do |page|15 pages << page16 end17 @crawler.base_url = "url"18 @crawler.crawl19 pages.should include("page1", "page2")20 end21 it "should stop when it sees a replay that already exists" do22 Parser.stub(:parse_replay_list).and_return([{:id => "1"}, {:id => "2"}])23 Parser.stub(:parse_replay_list_pages).and_return(["next_page"])24 @index.stub(:replay_exist?) { true }25 @index.stub(:save)26 HTTP.stub(:download) do |page|27 page.should_not eq("next_page")28 end29 @crawler.crawl30 end31 end32 describe "indexing" do33 before(:each) do34 HTTP.stub(:download)35 Parser.stub(:parse_replay_list).and_return([{ :id => "1", :data => "Test"}])36 Parser.stub(:parse_replay_list_pages).and_return([])37 end38 it "should not do anything if a replay already exists" do39 @index.should_receive(:replay_exist?).with("1").and_return(true)40 @index.stub(:save)41 @crawler.crawl42 end43 it "should add to index" do44 @index.should_receive(:add_replay).with({ :id => "1", :data => "Test" })45 @index.stub(:replay_exist?) { false }46 @index.stub(:save)47 @crawler.crawl48 end49 it "should save the index to filesystem" do50 @index.stub(:add_replay)51 @index.stub(:replay_exist?) { false }52 @index.should_receive(:save)53 @crawler.crawl54 end55 end56 describe "gosugamers" do57 it "should correctly join urls returned from parser" do58 Parser.stub(:parse_replay_list).and_return([])59 Parser.stub(:parse_replay_list_pages).and_return(["replays.php?&start=30"])60 @index.stub(:save)61 pages = []62 HTTP.stub(:download) do |page|63 pages << page64 end65 @crawler.crawl66 pages.should include("http://www.gosugamers.net/dota/replays.php?&start=30")67 end68 end69 describe "calling" do70 before(:each) do71 Parser.stub(:parse_replay_list).and_return([])72 Parser.stub(:parse_replay_list_pages).and_return(["replays.php?&start=30", "replays.php?&start=60"])73 HTTP.stub(:download)74 @index.stub(:save)75 end76 it "should report progress when a block is given" do77 data = []78 @crawler.crawl do |count, total|79 data << count << total80 end81 data.should eq([1, 3, 2, 3, 3, 3])82 end83 it "should stop when break is called in the block" do84 data = []85 @crawler.crawl do |count, total|86 data << count << total...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!