]> gitweb.pimeys.fr Git - bots/hung.git/blob - convert_dico.py
.gitignore += *.out
[bots/hung.git] / convert_dico.py
1 #!/usr/bin/python
2 # -*- coding:utf8 -*-
3
4
5 import re
6
7 f=open("dico_for_python.txt")
8 t=f.read()
9 f.close()
10
11 l=re.findall('"(.*)","(.*)"',t)
12
13 def maju(texte):
14 texte=unicode(texte,"utf8").lower()
15 remplacements = {
16 u"á":u"a",
17 u"à":u"a",
18 u"â":u"a",
19 u"ä":u"a",
20 u"é":u"e",
21 u"è":u"e",
22 u"ê":u"e",
23 u"ë":u"e",
24 u"í":u"i",
25 u"ì":u"i",
26 u"î":u"i",
27 u"ï":u"i",
28 u"ó":u"o",
29 u"ò":u"o",
30 u"ô":u"o",
31 u"ö":u"o",
32 u"ú":u"u",
33 u"ù":u"u",
34 u"û":u"u",
35 u"ü":u"u",
36 u"ý":u"y",
37 u"ỳ":u"y",
38 u"ŷ":u"y",
39 u"ÿ":u"y",
40 u"œ":u"oe",
41 u"æ":u"ae"
42 }
43 for avant,apres in remplacements.items():
44 texte=texte.replace(avant,apres)
45 return texte.upper()
46
47 l=[[maju(i[0]),i[0]+" : "+i[1].replace("\\n","; ")] for i in l
48 if len(i[0])>=8
49 and not " " in i[0]
50 and not i[0][-1]=='-'] # les mots finissant par un tiret foiraient
51 # en plus ils sont pas intéressants, donc on vire
52
53 f1=open("mots.txt.out","w")
54 f2=open("definitions.txt.out","w")
55 f1.writelines([(i[0]+"\n").encode("utf8") for i in l])
56 f2.writelines([i[1].decode("utf8").replace(u"note",u"nøte").replace(u"Note",u"Nøte").encode("utf8")+"\n" for i in l])
57 f1.close()
58 f2.close()