2009年2月8日星期日

帮人搬家之导入图片至Picasa

Blog搬家,文字部分其实很好处理,麻烦的是图片。如果BSP下手狠一点直接删掉,而且本地又没有备份的话,那就全完了。

歪酷到目前为止相册都还没动,不知道是没检查到还是怎样,总之现在还能抓就是了。

因为Ruby没有好用的HTMLParser类,所以以下代码用的是Javascript。由于我不确定歪酷服务器上的图片文件名是否唯一,所以直接将URL处理后作为文件名保存。

function parseHTML(src) {
  var doc = new ActiveXObject('htmlfile');
  doc.write(src);
  
  var es = doc.getElementsByTagName('img');
  for (var i=0;i<es.length;i++) {
    var s = es[i].src;
    if (s.indexOf('foto.ycstatic.com')>0) {
      var c = 'wget -O ' + encodeURIComponent(s) + ' ' + s;
      // 批处理文件需要
      c = c.replace('%', '%%');
      
      WScript.echo(c);
    }
  }
}

function parseXML(path) {
  var xml = new ActiveXObject('MSXML2.DOMDocument.3.0');
  xml.load(path);
  
  var es = xml.getElementsByTagName('feed/entry/summary');
  for (var i=0;i<es.length;i++) {
    parseHTML(es[i].firstChild.nodeValue);
  }
}


function main() {
  var fso = new ActiveXObject('Scripting.FileSystemObject');
  var fd = fso.GetFolder('.');
  var fc = new Enumerator(fd.Files);
  for (;!fc.atEnd();fc.moveNext()) {
    var s = String(fc.item());
    if (s.substr(s.length-4) == '.xml') {
      parseXML(s);
    }
  }
}

main();

上传至Picasa的代码倒没什么可解释的,只需要留意一点:每个Picasa相册最多只能容纳500张图片。

require 'net/https'
require 'uri'
require 'rexml/document'
require 'FileUtils'

def getAuth(email, passwd)
  uri = URI.parse('https://www.google.com/accounts/ClientLogin')
  
  req = Net::HTTP::Post.new(uri.path)
  req.set_form_data({'Email'=>email, 'Passwd'=>passwd, 'service'=>'lh2'})
  
  http = Net::HTTP.new(uri.host, uri.port)
  http.use_ssl = true
  res = http.start {|h| h.request(req)}
  
  case res
  when Net::HTTPSuccess
    res.body.split("\n").each { |s|
      return s if s[0,5] == 'Auth='
    }
  else
    puts res.body
    res.error!
  end
end

def ul2Picasa(auth, albumid, title, summary, filename, ftype='image/jpeg')
  uri = URI.parse("http://picasaweb.google.com/data/feed/api/user/default/albumid/#{albumid}")
  
  body = <<EOF
Media multipart posting
--END_OF_PART
Content-Type: application/atom+xml

<entry xmlns='http://www.w3.org/2005/Atom'>
  <title>#{title}</title>
  <summary>#{summary}</summary>
  <category scheme="http://schemas.google.com/g/2005#kind"
    term="http://schemas.google.com/photos/2007#photo"/>
</entry>
--END_OF_PART
Content-Type: #{ftype}

#{File.open(filename, 'rb') {|f| f.read()}}
--END_OF_PART--
EOF

  
  req = Net::HTTP::Post.new(uri.path)
  req.body = body
  req.set_content_type('multipart/related; boundary="END_OF_PART"')
  req['MIME-version'] = '1.0'
  req['Authorization'] = "GoogleLogin #{auth}"
  req['Content-Length'] = body.length
  
  http = Net::HTTP.new(uri.host, uri.port)
  res = http.start {|h| h.request(req)}
  
  case res
  when Net::HTTPSuccess
    xmldoc = REXML::Document.new(res.body)
    yield xmldoc.get_elements('entry/content')[0].attributes.get_attribute('src').to_s
  else
    puts res.body
    res.error!
  end
end

email = 'USERNAME@gmail.com'
passwd = 'USERPASSWORD'

# 该值可通过收工创建相册并查看源代码获得
albumid = 'NNNNNNNNNNNNNNNNNN'

# 把上传完成的文件移动到该目录
mvdir = '@up'

auth = getAuth(email, passwd)

# 作纪录备用
flog = File.open('piclog.txt', 'w')
Dir.glob('*.jpg') { |fn|
  ul2Picasa(auth, albumid, fn, '', fn) { |url|
    puts fn
    flog.puts fn + "\t" + url.sub(/(\/[^\/]+)$/, '/s800\1')
    FileUtils.move(fn, mvdir)
  }
}
flog.close

没有评论 :