]> gitweb.pimeys.fr Git - dtc.git/blob - dtc.py
Init : dtc. Pour récupérer les quotes, les stocker dans une BDD, les afficher…
[dtc.git] / dtc.py
1 #!/usr/bin/env python
2 # -*- coding:utf-8 -*-
3 import sys
4
5 import html2text
6 import nltk
7 import re
8 import feedparser
9 import psycopg2
10 import psycopg2.extras
11 import json
12 VERBOSE = False
13
14 def getcursor():
15 """Récupère un curseur connecté à la base dtc"""
16 con = psycopg2.connect(database="dtc")
17 cur = con.cursor(cursor_factory=psycopg2.extras.DictCursor)
18 return con, cur
19
20
21 def insert(quote):
22 """Insère une quote dans la base"""
23 if VERBOSE:
24 print "INSERTing %s" % quote["id"]
25 con, cur = getcursor()
26 cur.execute("INSERT INTO quotes (id, date, quote) VALUES (%(id)s, %(date)s, %(quote)s);",
27 quote)
28 cur.execute("COMMIT;")
29
30 def last_inserted():
31 """Récupère l'id de la dernière quote enregistrée"""
32 con, cur = getcursor()
33 cur.execute("SELECT MAX(id) AS maxid FROM quotes;")
34 return cur.fetchone()["maxid"]
35
36 def fetch_rss():
37 """Récupère le flux RSS et le formate"""
38 flux = feedparser.parse('http://feeds.feedburner.com/bashfr-quotes')
39 quotes = []
40 for q in flux["entries"]:
41 try:
42 id = int(q["title"])
43 except ValueError:
44 print "FAILED : " + q["title"]
45 continue
46 date = q["published"]
47 quote = format(q["summary_detail"]["value"])
48 quotes.append({"id" : id, "date" : date, "quote" : quote})
49 return quotes
50
51
52 def format(quote):
53 """Dé-HTML-ise la quote"""
54 raw = html2text.unescape(nltk.clean_html(quote))
55 # Fucking garbage
56 raw = re.sub(" Votez !$", "", raw)
57 return raw
58
59 def get_quotes(first, last=None):
60 """Récupère des quotes dans la base."""
61 add = ""
62 if last != None:
63 add = "AND id <= %(last)s"
64 con, cur = getcursor()
65 req = "SELECT * FROM quotes WHERE id >= %(first)s " + add + ";"
66 cur.execute(req, {"first" : first, "last" : last})
67 l = cur.fetchall()
68 l = [dict(q) for q in l]
69 for d in l:
70 if d["date"]:
71 d["date"] = d["date"].strftime("%F %T")
72 return l
73
74 def display(liste):
75 """Affiche des quotes"""
76 t = ("\n" + "_"*80 + "\n").join(["%(id)s (%(date)s)\n%(quote)s" % q for q in liste])
77 print t
78
79 if __name__ == "__main__":
80 import sys
81 if sys.argv[1] in ["fetch", "update", "pull"]:
82 l = fetch_rss()
83 print l
84 last = last_inserted()
85 for q in l:
86 if q["id"] > last:
87 insert(q)
88 else:
89 id = int(sys.argv[1])
90 try:
91 id2 = int(sys.argv[2])
92 except ValueError:
93 if sys.argv[2] in ["+", "_"]:
94 id2 = None
95 else:
96 print "Le deuxième paramètre doit être un entier ou '+'"
97 except IndexError:
98 id2 = id
99 quotes = get_quotes(first=id, last=id2)
100 if "--json" in sys.argv:
101 print json.dumps(quotes)
102 else:
103 display(quotes)