download_picasa_album.py

   1 #!/usr/bin/env python
   2 # -*- encoding:utf-8 -*-
   3
   4 """
   5
   6 Pour télécharger un album Picasa sans avoir à utiliser l'application Picasa
   7
   8 Il faut lui fournir le l'url vers le flux RSS
   9
  10 """
  11
  12 import urllib
  13 import lxml.etree
  14 import os
  15
  16 def get_album(url):
  17     """Récupère le xml décrivant l'album à partir de l'``url`` du flux RSS"""
  18     page = urllib.urlopen(url)
  19     content = page.read()
  20     parsed = lxml.etree.fromstring(content)
  21     return parsed
  22
  23 def album_to_links(album):
  24     """Un peu de xpath pour récupérer les liens directs des éléments
  25        de l'``album``."""
  26     medias = album.xpath("//media:group", namespaces=album.nsmap)
  27     processed = []
  28     for media in medias:
  29         contenturls = media.xpath("./media:thumbnail", namespaces=media.nsmap)
  30         print [i.items() for i in contenturls]
  31         contenturl = contenturls[-1]
  32         contenturl = [val for (kw, val) in contenturl.items() if kw == "url"][0]
  33         # On récupère la meilleure résolution de l'image
  34         contenturl = contenturl.replace("s288", "s5000")
  35         title = media.xpath("./media:title", namespaces=media.nsmap)[0]
  36         title = title.text
  37         processed.append([title, contenturl])
  38     return processed
  39
  40 def download_file(url, path):
  41     """Télécharge le fichier depuis l'``url`` et l'enregistre dans ``path``."""
  42     page = urllib.urlopen(url)
  43     with open(path, "w") as f:
  44         bloc = page.read(4096)
  45         while bloc != "":
  46             f.write(bloc)
  47             bloc = page.read(4096)
  48
  49 def do_all(rss_url, folder):
  50     """Télécharge tout l'album depuis ``rss_url`` et le stocke dans ``folder``"""
  51     album = get_album(rss_url)
  52     titles = album.xpath("//image/title", namespaces=album.nsmap)
  53     albumtitle = titles[0].text
  54     os.mkdir("%s/%s" % (folder, albumtitle))
  55     linktitles = album_to_links(album)
  56     for [title, url] in linktitles:
  57         print "Downloading %s : %s" % (title, url)
  58         download_file(url, "%s/%s/%s" % (folder, albumtitle, title))
  59     return albumtitle
  60
  61 if __name__ == "__main__":
  62     import sys
  63     rss_url = sys.argv[1]
  64     albumtitle = do_all(rss_url, ".")
  65     print (u"Téléchargé dans %s" % albumtitle).encode("utf-8")