Avec les sources pour le dico

[bots/hung.git] / convert_dico.py
diff --git a/convert_dico.py b/convert_dico.py

new file mode 100644 (file)

index 0000000..1e5d187
--- /dev/null
+++ b/convert_dico.py
@@ -0,0 +1,58 @@
+#!/usr/bin/python
+# -*- coding:utf8 -*-
+
+
+import re
+
+f=open("dico_for_python.txt")
+t=f.read()
+f.close()
+
+l=re.findall('"(.*)","(.*)"',t)
+
+def maju(texte):
+    texte=unicode(texte,"utf8").lower()
+    remplacements = {
+               u"á":u"a",
+               u"à":u"a",
+               u"â":u"a",
+               u"ä":u"a",
+               u"é":u"e",
+               u"è":u"e",
+               u"ê":u"e",
+               u"ë":u"e",
+               u"í":u"i",
+               u"ì":u"i",
+               u"î":u"i",
+               u"ï":u"i",
+               u"ó":u"o",
+               u"ò":u"o",
+               u"ô":u"o",
+               u"ö":u"o",
+               u"ú":u"u",
+               u"ù":u"u",
+               u"û":u"u",
+               u"ü":u"u",
+               u"ý":u"y",
+               u"ỳ":u"y",
+               u"ŷ":u"y",
+               u"ÿ":u"y",
+               u"œ":u"oe",
+               u"æ":u"ae"
+    }
+    for avant,apres in remplacements.items():
+        texte=texte.replace(avant,apres)
+    return texte.upper()
+
+l=[[maju(i[0]),i[1].replace("\\n","; ")] for i in l 
+    if len(i[0])>=8 and not " " in i[0]]
+print l[0]
+print l[1]
+print l[2]
+
+f1=open("mots.txt.out","w")
+f2=open("definitions.txt.out","w")
+f1.writelines([(i[0]+"\n").encode("utf8") for i in l])
+f2.writelines([i[1]+"\n" for i in l])
+f1.close()
+f2.close()