X-Git-Url: http://gitweb.pimeys.fr/?p=dtc.git;a=blobdiff_plain;f=dtc.py;h=d9cd465c5c5ff2dd41af5e46c23c810671b4af4a;hp=826b2c0204f5599294e02aaa8748ef3479987925;hb=56c8287c00eab4c3b6673502d5b964a43d70ccb7;hpb=ed41d64a149a9de8e576788e9a1586395ccfa2b7 diff --git a/dtc.py b/dtc.py index 826b2c0..d9cd465 100755 --- a/dtc.py +++ b/dtc.py @@ -2,13 +2,12 @@ # -*- coding:utf-8 -*- import sys -import html2text -import nltk -import re -import feedparser import psycopg2 import psycopg2.extras import json + +import fetch_dtc + VERBOSE = False def getcursor(): @@ -33,29 +32,6 @@ def last_inserted(): cur.execute("SELECT MAX(id) AS maxid FROM quotes;") return cur.fetchone()["maxid"] -def fetch_rss(): - """Récupère le flux RSS et le formate""" - flux = feedparser.parse('http://feeds.feedburner.com/bashfr-quotes') - quotes = [] - for q in flux["entries"]: - try: - id = int(q["title"]) - except ValueError: - print "FAILED : " + q["title"] - continue - date = q["published"] - quote = format(q["summary_detail"]["value"]) - quotes.append({"id" : id, "date" : date, "quote" : quote}) - return quotes - - -def format(quote): - """Dé-HTML-ise la quote""" - raw = html2text.unescape(nltk.clean_html(quote)) - # Fucking garbage - raw = re.sub(" Votez !$", "", raw) - return raw - def get_quotes(first, last=None): """Récupère des quotes dans la base.""" add = "" @@ -79,8 +55,7 @@ def display(liste): if __name__ == "__main__": import sys if sys.argv[1] in ["fetch", "update", "pull"]: - l = fetch_rss() - print l + l = fetch_dtc.getquotes() last = last_inserted() for q in l: if q["id"] > last: