サンプルコード。多段リダイレクトにも対応。


#!/usr/bin/env ruby
# coding: utf-8
$KCODE='u'
 
require 'net/http'
require 'uri'
 
Net::HTTP.version_1_2
 
def get_expand_urls(url, expand_urls = [])
  begin
    uri = URI.parse(url)
    Net::HTTP.start(uri.host, uri.port){|http|
      res = http.head(uri.request_uri)
      #res = http.get(uri.request_uri) # HTTP head 非対応のサイトはこれで
      expand_url = res['Location']
      if expand_url
        expand_urls.push(expand_url)
        get_expand_urls(expand_url, expand_urls)
      end
    }
  rescue => ex
    p ex.backtrace
  end
  return expand_urls
end
 
# sample data
urls = [
  'http://t.co/m95HuCwIGb',
  'http://t.co/MbFx2qdZAm',
  'http://t.co/ZVwLbId8Cq',
  'http://t.co/IEMrNsyjzu',
  'http://t.co/aFwRbEd4ro',
]
urls.each{|url|
  expand_urls = get_expand_urls(url)
  puts "#{url}:"
  expand_urls.each{|expand_url|
    puts "-> #{expand_url}"
  }
  puts ''
}

実行結果。一部エラーが出ているけど気にしない。


$ uname -mrsv
Linux 2.6.32-5-amd64 #1 SMP Sun May 6 04:00:17 UTC 2012 x86_64
 
$ ruby -v
ruby 1.8.7 (2010-08-16 patchlevel 302) [x86_64-linux]
 
$ ruby ./expand_urls.rb
 
http://t.co/m95HuCwIGb:
-> http://medical.yahoo.co.jp/feature/pm25/
 
http://t.co/MbFx2qdZAm:
-> http://yahoo.jp/lfi_YV
-> http://chizucam.olp.yahoo.co.jp/20137e673a2b6aa505b972544a287f28ee7d
 
http://t.co/ZVwLbId8Cq:
-> http://bit.ly/GHrsbD
-> http://weather.yahoo.co.jp/weather/jp/kafun/23/
 
http://t.co/IEMrNsyjzu:
-> http://htn.to/aFTpx9
-> http://mono.hatena.ne.jp/mono/wtmnJFgWfs#/nilab/wtppE8ESgY
 
["/usr/lib/ruby/1.8/net/protocol.rb:135:in `sysread'", "/usr/lib/ruby/1.8/net/protocol.rb:135:in `rbuf_fill'", "/usr/lib/ruby/1.8/timeout.rb:67:in `timeout'", "/usr/lib/ruby/1.8/timeout.rb:101:in `timeout'", "/usr/lib/ruby/1.8/net/protocol.rb:134:in `rbuf_fill'", "/usr/lib/ruby/1.8/net/protocol.rb:116:in `readuntil'", "/usr/lib/ruby/1.8/net/protocol.rb:126:in `readline'", "/usr/lib/ruby/1.8/net/http.rb:2028:in `read_status_line'", "/usr/lib/ruby/1.8/net/http.rb:2017:in `read_new'", "/usr/lib/ruby/1.8/net/http.rb:1051:in `request'", "/usr/lib/ruby/1.8/net/http.rb:801:in `head'", "./expand_urls.rb:14:in `get_expand_urls'", "/usr/lib/ruby/1.8/net/http.rb:543:in `start'", "/usr/lib/ruby/1.8/net/http.rb:440:in `start'", "./expand_urls.rb:13:in `get_expand_urls'", "./expand_urls.rb:19:in `get_expand_urls'", "/usr/lib/ruby/1.8/net/http.rb:543:in `start'", "/usr/lib/ruby/1.8/net/http.rb:440:in `start'", "./expand_urls.rb:13:in `get_expand_urls'", "./expand_urls.rb:19:in `get_expand_urls'", "/usr/lib/ruby/1.8/net/http.rb:543:in `start'", "/usr/lib/ruby/1.8/net/http.rb:440:in `start'", "./expand_urls.rb:13:in `get_expand_urls'", "./expand_urls.rb:37", "./expand_urls.rb:36:in `each'", "./expand_urls.rb:36"]
http://t.co/aFwRbEd4ro:
-> http://4sq.com/YWHhml
-> https://foursquare.com/nilab/checkin/5179c1f7e4b0d88e2fa324e7?s=8IcatVlzuj-rOktGZp4hCUUSDyg&ref=tw

tags: ruby url

Posted by NI-Lab. (@nilab)