$KCODE = 'u' require 'rubygems' require 'hpricot' require 'open-uri' require 'nkf' require 'uri' require 'timeout' class Starbucks attr_accessor :storeId attr_accessor :name attr_accessor :address attr_accessor :lat attr_accessor :lng attr_accessor :tel attr_accessor :open_at def initialize(id, name, lat, lng, addr, tel, op) @storeId = id @name = name @lat = lat @lng = lng @address = addr @tel = tel @open_at = op end def open_eng @open_at.gsub(/\s/, "") \ .gsub("定休日", "Reg.Holiday") \ .gsub("不定休", "unfixed") \ .gsub("曜日", "") \ .gsub("〜", "-") \ .gsub("・", "") \ .gsub("は", ": ") \ .gsub("ドライブスルー", "DriveThru ") \ .gsub("/", "/ ") \ .gsub("月", "Mon.") \ .gsub("火", "Tue.") \ .gsub("水", "Wed.") \ .gsub("木", "Thu.") \ .gsub("金", "Fri.") \ .gsub("土", "Sat.") \ .gsub("日", "Sun.") \ .gsub("祝", "") end def to_s "%d,%8.5f,%8.5f,%s,%s" % [@storeId, @lng, @lat, @name, @address] end def to_poi "%8.5f,%8.5f,\"Starbucks\", \"%s\"" % [@lng, @lat, open_eng()] end end # Timeout時のretry付きでuriを開く def openURI(uri) retries = 5 begin timeout(30){ Hpricot.parse(NKF.nkf('-w', open(uri).read)) } rescue Timeout::Error retries -= 1 if retries > 0 sleep 5 and retry else raise end end end # 店舗数を取得 doc = openURI("http://www.starbucks.co.jp/search/result_store.php") result = (doc/"span.S").inner_text n_of_starbucks = result.scan(/^(\d+)/).flatten[0].to_i; n_of_pages = n_of_starbucks / 10 + 1 # サイトのページ数 puts "Number of Starbucks : #{n_of_starbucks}" # store IDの取得 storeIds = [] n_of_pages.times do |n| puts "Processing page #{n + 1}" uri = "http://www.starbucks.co.jp/search/result_store.php?SearchString=&DriveThrowgh=&Terrace=&HoleBean=&TakeOut=&storelist=#{n * 10 + 1}" doc = openURI(uri) (doc/:a).each do |a| if a[:href] =~ /storeId=\d+/ then id = a[:href].scan(/storeId=(\d+)/).flatten[0].to_i if id < 9000 then storeIds << id else puts "Dropped #{id}" end end end end puts "" puts "Real number of Starbucks : #{storeIds.size}" # 改行や?などの記号を除去 def strip(str) str.gsub!("?", "") str.gsub("\n", "/") end # 日本測地系(秒単位)から世界測地系へ変換 def conv(ln, la) # 経度、緯度 (単位:度) lng = ln - la * 0.000046038 - ln * 0.000083043 + 0.010040; lat = la - la * 0.00010695 + ln * 0.000017464 + 0.0046017; [lng, lat] end # 店舗情報の取得 stores = [] storeIds.each do |id| uri = "http://www.starbucks.co.jp/search/map/result.php?storeId=#{id}&lang=ja" doc = openURI(uri) html = doc.to_original_html lng = html.scan(/reqX\s*=\s*(\d+\.\d+)/).flatten[0].to_f # 経度(reqX) lat = html.scan(/reqY\s*=\s*(\d+\.\d+)/).flatten[0].to_f # 緯度(reqY) lng /= 3600.0 # 秒->度 lat /= 3600.0 lng, lat = conv(lng, lat) name = strip(doc.at("th/[text()*='店舗名']").parent.next_sibling.to_plain_text) addr = strip(doc.at("th/[text()*='住所']").parent.next_sibling.to_plain_text) tel = strip(doc.at("th/[text()*='電話番号']").parent.next_sibling.to_plain_text) op = strip(doc.at("th/[text()*='営業時間']").parent.next_sibling.to_plain_text) s = Starbucks.new(id, name, lat, lng, addr, tel, op) stores << s puts s.to_s end # POIファイルを出力 open("starbucks_poi.csv", 'w') do |f| stores.each do |s| f.puts NKF.nkf('-s', s.to_poi) end end