# rss scraper for emol.com # (c) Duncan Mac-Vicar P. require 'hpricot' require 'open-uri' require 'time' titles = Array.new previews = Array.new links = Array.new times = Array.new # load the emol news page doc = Hpricot(open("http://www.emol.com/noticias/todas/_portada/todas.asp")) doc.search('#noticias_listado').search('span.noticias_listado_titulos') do | el | link = el.at('a') titles << link.inner_html links << link['href'] #puts link['href'] end doc.search('#noticias_listado').search('span.noticias_preview') do | el | previews << el.inner_html #puts el.inner_html end doc.search('#noticias_listado').search('span.hora_noticias') do | el | times << el.inner_html #puts el.inner_html end #date = doc.search('#noticias_listado').search('span.verd1negra').at('strong').inner_html #puts date puts "" puts "" puts "" puts " EMOL.com" puts " EMOL.com, Edición en línea del periódico." puts " http://www.emol.com" i = 0; titles.each do | title | puts " " puts " #{times[i]} - #{titles[i]}" puts " #{previews[i]}" puts " http://www.emol.com#{links[i]}" #puts " "Tue, 03 Jun 2003 09:39:21 GMT" #http://liftoff.msfc.nasa.gov/2003/06/03.html#item573 puts " " i = i + 1 end puts "" puts ""