サンプルコード。多段リダイレクトにも対応。
#!/usr/bin/env ruby
# coding: utf-8
$KCODE='u'
require 'net/http'
require 'uri'
Net::HTTP.version_1_2
def get_expand_urls(url, expand_urls = [])
begin
uri = URI.parse(url)
Net::HTTP.start(uri.host, uri.port){|http|
res = http.head(uri.request_uri)
#res = http.get(uri.request_uri) # HTTP head 非対応のサイトはこれで
expand_url = res['Location']
if expand_url
expand_urls.push(expand_url)
get_expand_urls(expand_url, expand_urls)
end
}
rescue => ex
p ex.backtrace
end
return expand_urls
end
# sample data
urls = [
'http://t.co/m95HuCwIGb',
'http://t.co/MbFx2qdZAm',
'http://t.co/ZVwLbId8Cq',
'http://t.co/IEMrNsyjzu',
'http://t.co/aFwRbEd4ro',
]
urls.each{|url|
expand_urls = get_expand_urls(url)
puts "#{url}:"
expand_urls.each{|expand_url|
puts "-> #{expand_url}"
}
puts ''
}
実行結果。一部エラーが出ているけど気にしない。
$ uname -mrsv
Linux 2.6.32-5-amd64 #1 SMP Sun May 6 04:00:17 UTC 2012 x86_64
$ ruby -v
ruby 1.8.7 (2010-08-16 patchlevel 302) [x86_64-linux]
$ ruby ./expand_urls.rb
http://t.co/m95HuCwIGb:
-> http://medical.yahoo.co.jp/feature/pm25/
http://t.co/MbFx2qdZAm:
-> http://yahoo.jp/lfi_YV
-> http://chizucam.olp.yahoo.co.jp/20137e673a2b6aa505b972544a287f28ee7d
http://t.co/ZVwLbId8Cq:
-> http://bit.ly/GHrsbD
-> http://weather.yahoo.co.jp/weather/jp/kafun/23/
http://t.co/IEMrNsyjzu:
-> http://htn.to/aFTpx9
-> http://mono.hatena.ne.jp/mono/wtmnJFgWfs#/nilab/wtppE8ESgY
["/usr/lib/ruby/1.8/net/protocol.rb:135:in `sysread'", "/usr/lib/ruby/1.8/net/protocol.rb:135:in `rbuf_fill'", "/usr/lib/ruby/1.8/timeout.rb:67:in `timeout'", "/usr/lib/ruby/1.8/timeout.rb:101:in `timeout'", "/usr/lib/ruby/1.8/net/protocol.rb:134:in `rbuf_fill'", "/usr/lib/ruby/1.8/net/protocol.rb:116:in `readuntil'", "/usr/lib/ruby/1.8/net/protocol.rb:126:in `readline'", "/usr/lib/ruby/1.8/net/http.rb:2028:in `read_status_line'", "/usr/lib/ruby/1.8/net/http.rb:2017:in `read_new'", "/usr/lib/ruby/1.8/net/http.rb:1051:in `request'", "/usr/lib/ruby/1.8/net/http.rb:801:in `head'", "./expand_urls.rb:14:in `get_expand_urls'", "/usr/lib/ruby/1.8/net/http.rb:543:in `start'", "/usr/lib/ruby/1.8/net/http.rb:440:in `start'", "./expand_urls.rb:13:in `get_expand_urls'", "./expand_urls.rb:19:in `get_expand_urls'", "/usr/lib/ruby/1.8/net/http.rb:543:in `start'", "/usr/lib/ruby/1.8/net/http.rb:440:in `start'", "./expand_urls.rb:13:in `get_expand_urls'", "./expand_urls.rb:19:in `get_expand_urls'", "/usr/lib/ruby/1.8/net/http.rb:543:in `start'", "/usr/lib/ruby/1.8/net/http.rb:440:in `start'", "./expand_urls.rb:13:in `get_expand_urls'", "./expand_urls.rb:37", "./expand_urls.rb:36:in `each'", "./expand_urls.rb:36"]
http://t.co/aFwRbEd4ro:
-> http://4sq.com/YWHhml
-> https://foursquare.com/nilab/checkin/5179c1f7e4b0d88e2fa324e7?s=8IcatVlzuj-rOktGZp4hCUUSDyg&ref=tw
tags: ruby url
Posted by NI-Lab. (@nilab)