#!/usr/bin/ruby -l require 'rubygems' require 'mechanize' require 'graphviz' if (ARGV.length != 2) puts "Usage:" puts "./gene.rb \"LastName, FirstName\" time_between_clicks(s)" exit end $genealogy = GraphViz::new("G") $depth=0 $visited=[] $traverse=true $start_year=0 def traverse_tree(agent, page, descendant) desc_str = descendant.gsub(/[[:space:]]/, '_').gsub(/[.,-;')(]/, "") advisor_list = page.search("//*[contains(.,'Advisor')]/a/text()") desc_univ_year = page.search("//*[contains(.,'Ph.D')]/span/text()") if (desc_univ_year.length == 0) desc_univ_year = page.search("//*[contains(.,'Dr')]/span/text()") end if (desc_univ_year.length == 0) desc_univ_year = page.search("//*[contains(.,'Phil')]/span/text()") end if (desc_univ_year.length == 0) desc_univ_year = page.search("//*[contains(.,'Magist')]/span/text()") end if (desc_univ_year.length == 0) desc_univ_year = page.search("//*[contains(.,'B.S')]/span/text()") end desc_univ_year.inner_text() =~ /\d\d\d\d/ desc_year = $&.to_i if ($start_year == 0) $start_year = desc_year end year_diff = $start_year - desc_year puts year_diff node_color="" case year_diff when 0..50: node_color = "green" puts node_color when 51..100: node_color = "blue" puts node_color when 100..150: node_color = "yellow" puts node_color when 150..200: node_color = "orange" puts node_color when 200..250: node_color = "red" puts node_color else node_color = "cyan" puts node_color end desc_node = $genealogy.add_node(desc_str, :color => node_color) # Recursion base case return if (advisor_list.length == 0) # Look for advisors page.links.each do |link| for i in 0..(advisor_list.length-1) if (link.text == advisor_list[i].inner_text()) adv_str = advisor_list[i].inner_text().gsub(/[[:space:]]/, '_').gsub(/[.,-;'()]/, "") adv_node = $genealogy.add_node(adv_str, :color => node_color) $genealogy.add_edge(desc_str, adv_str) puts "#{desc_str} => #{adv_str} " page = agent.click(link) sleep ARGV[1].to_i # Recurse $visited.each { |v| if (v == adv_str) $traverse=false break else $traverse=true end } if ($traverse) traverse_tree(agent, page, advisor_list[i].inner_text()) $visited << adv_str end end end end end # Write graph to the file when Ctrl-C is pressed trap("SIGINT") { $genealogy.output(:output => "png", :file => "genealogy.png")} agent = WWW::Mechanize.new { |agent| agent.user_agent_alias = 'Mac Safari' } agent.get('http://genealogy.math.ndsu.nodak.edu/') do |page| # Search for a term and store the result search_result = page.form_with(:action => 'quickSearch.php') do |search| search.searchTerms = ARGV[0] end.submit # Look for a specific link in the result, click the link and store the resulting page search_result.links.each do |link| if (link.text =~ /#{ARGV[0]}/) #if (link.text =~ ARGV[1]) #advisor_page = agent.click(link) page = agent.click(link) end end traverse_tree(agent, page, ARGV[0]) $genealogy.output(:output => "png", :file => "genealogy.png") end