# -*- coding:utf-8 -*-
import sys
-import html2text
-import nltk
-import re
-import feedparser
import psycopg2
import psycopg2.extras
import json
+
+import fetch_dtc
+
VERBOSE = False
def getcursor():
cur.execute("SELECT MAX(id) AS maxid FROM quotes;")
return cur.fetchone()["maxid"]
-def fetch_rss():
- """Récupère le flux RSS et le formate"""
- flux = feedparser.parse('http://feeds.feedburner.com/bashfr-quotes')
- quotes = []
- for q in flux["entries"]:
- try:
- id = int(q["title"])
- except ValueError:
- print "FAILED : " + q["title"]
- continue
- date = q["published"]
- quote = format(q["summary_detail"]["value"])
- quotes.append({"id" : id, "date" : date, "quote" : quote})
- return quotes
-
-
-def format(quote):
- """Dé-HTML-ise la quote"""
- raw = html2text.unescape(nltk.clean_html(quote))
- # Fucking garbage
- raw = re.sub(" Votez !$", "", raw)
- return raw
-
def get_quotes(first, last=None):
"""Récupère des quotes dans la base."""
add = ""
if __name__ == "__main__":
import sys
if sys.argv[1] in ["fetch", "update", "pull"]:
- l = fetch_rss()
- print l
+ l = fetch_dtc.getquotes()
last = last_inserted()
for q in l:
if q["id"] > last:
- insert(q)
+ try:
+ insert(q)
+ except psycopg2.IntegrityError as err:
+ print "Bim %s\nl,last_inserted = %s, %s" % (err, l, last_inserted)
else:
id = int(sys.argv[1])
try: