#!/usr/bin/ruby require 'nokogiri' require 'open-uri' require 'rss' debug = true if ARGV[0] == "-d" url = 'http://www.st.ryukoku.ac.jp/~kjm/security/memo/' url = './index.html' if debug puts "opening #{url}" if debug doc = Nokogiri::HTML(open(url)) ### FIXME: any better ways there? doc.css('a[href^="/~kjm/"]').each do |anc| anc['href'] = 'http://www.st.ryukoku.ac.jp' + anc['href'] puts "prefixed: #{anc['href']}" if debug end ### Which version should we use? rss = RSS::Maker.make("2.0") do |xml| xml.channel.title = doc.title xml.channel.link = url xml.channel.description = doc.css('div.NORMAL').first.children p xml.channel if debug doc.css('a.NU').each do |link| next if link.parent.name == "h2" puts "processing: #{link}" if debug i = xml.items.new_item ### a"》", span" ", content i.title = link.next.next.content i.link = link['href'] if link.parent.name == "p" ### Normal short items i.description = link.parent.parent.children elsif link.parent.name == "h3" ### "various", "tuiki" etc i.description = link.parent.next.next else i.description = "Something wrong" end i.date = Time.parse(/#([0-9]{8})/.match(link['href'])[1]) if debug puts " #{link.parent.name}: Title: #{i.title}" puts " Link: #{i.link}" puts " Date: #{i.date}" puts "" ### description is too long to put here end end ### TTL depends on your cron settings xml.channel.ttl = "60" ### (in minutes) end ### Lazy: should check before writing File.open("shm.rss", "w") do |f| f.write(rss.to_s) end