Docomo 絵文字対応表 作成

Unicode のバイナリも直接コピーできるのでドコモ絵文字コードの種類と一覧(裏絵文字対応)をよく参照してたんだけど、よく見たら Shift_JIS の 10 進コードが間違ってるんで、自分で作ってみた。

require 'rubygems'
require 'mechanize'

agent = WWW::Mechanize.new

# docomo page url misspells 'extension' as 'extention'
urls = [ 
  'http://www.nttdocomo.co.jp/service/imode/make/content/pictograph/basic/',
  'http://www.nttdocomo.co.jp/service/imode/make/content/pictograph/extention/'
]
emoticons = Nokogiri::XML::NodeSet.new(Nokogiri::XML::Document.new)
urls.each do |url|
  agent.get(url)
  emoticons = agent.page.search('//tr[@class="acenter middle"]')
  emoticons.each do |emoticon|
    next if(emoticon == emoticons[0] || emoticon == emoticons[1])
    tds = emoticon.css('td')
    serial_num = tds[0].text
    sjis_hex  =  tds[2].text
    sjis_dec  = sjis_hex.hex
    unicode_hex = tds[4].text
    unicode_dec = unicode_hex.hex
    unicode_bin = [unicode_dec].pack('U*')
    title = tds[5].text
    image = emoticon.css('td img').first.attributes
    image_path = image['src'].text
    image_width = image['width'].text
    image_height = image['height'].text
    puts "|#{serial_num}|[#{url+image_path}:image:w#{image_width},h#{image_height}]|#{sjis_dec}|#{sjis_hex}|#{unicode_dec}|#{unicode_hex}|#{unicode_bin}|#{title}|"
  end 
end