X-Git-Url: http://gitweb.pimeys.fr/?a=blobdiff_plain;f=dtc.py;h=162e735ca64a56fc4110eaeb063e6cf8d32b0c72;hb=38b89ae326e2f735696c9701e0efbc1967422144;hp=826b2c0204f5599294e02aaa8748ef3479987925;hpb=ed41d64a149a9de8e576788e9a1586395ccfa2b7;p=dtc.git diff --git a/dtc.py b/dtc.py index 826b2c0..162e735 100755 --- a/dtc.py +++ b/dtc.py @@ -2,13 +2,12 @@ # -*- coding:utf-8 -*- import sys -import html2text -import nltk -import re -import feedparser import psycopg2 import psycopg2.extras import json + +import fetch_dtc + VERBOSE = False def getcursor(): @@ -33,29 +32,6 @@ def last_inserted(): cur.execute("SELECT MAX(id) AS maxid FROM quotes;") return cur.fetchone()["maxid"] -def fetch_rss(): - """Récupère le flux RSS et le formate""" - flux = feedparser.parse('http://feeds.feedburner.com/bashfr-quotes') - quotes = [] - for q in flux["entries"]: - try: - id = int(q["title"]) - except ValueError: - print "FAILED : " + q["title"] - continue - date = q["published"] - quote = format(q["summary_detail"]["value"]) - quotes.append({"id" : id, "date" : date, "quote" : quote}) - return quotes - - -def format(quote): - """Dé-HTML-ise la quote""" - raw = html2text.unescape(nltk.clean_html(quote)) - # Fucking garbage - raw = re.sub(" Votez !$", "", raw) - return raw - def get_quotes(first, last=None): """Récupère des quotes dans la base.""" add = "" @@ -79,12 +55,14 @@ def display(liste): if __name__ == "__main__": import sys if sys.argv[1] in ["fetch", "update", "pull"]: - l = fetch_rss() - print l + l = fetch_dtc.getquotes() last = last_inserted() for q in l: if q["id"] > last: - insert(q) + try: + insert(q) + except psycopg2.IntegrityError as err: + print "Bim %s\nl,last_inserted = %s, %s" % (err, l, last_inserted) else: id = int(sys.argv[1]) try: