tradinter.py
#!/usr/bin/python # -*- coding: utf-8 -*- """ This bot goes over multiple pages of the home wiki, and edits them without changing. This is for example used to get category links in templates working. Don't forget to set the ftout to your current list of words, see below for a line that looks like: ftout =open('/home/cmillet/wikitruks/wiktio/all/2005-12-14.txt', 'r') This script understands various command-line arguments: -start: used as -start:page_name, specifies that the robot should go alphabetically through all pages on the home wiki, starting at the named page. -file: used as -file:file_name, read a list of pages to treat from the named textfile. Page titles should be enclosed in ]. -ref: used as -start:page_name, specifies that the robot should touch all pages referring to the named page. -cat: used as -cat:category_name, specifies that the robot should touch all pages in the named category. All other parameters will be regarded as a page title; in this case, the bot will only touch a single page. """ import wikipedia, wiktionary, pagegenerators, catlib import sys import re tradmsg = "{{-trad-}}" ''' listelng = # retrait de dog : pas de wiktionnaire dans cette langue nowiktiolng = ''' # Wiktionaries that makes the distinction between this and This : # (the list http://meta.wikimedia.orghttps://dictious.com/fr/Help:Page_name is not really uptodate) nocaplng = #Wiktionary I checked that still capitalize there entries: # ln -- pt # ftout MUST BE SET correctly ftout =open('/home/cmillet/wikitruks/wiktio/all/2005-12-14.txt', 'r') lst_mots = ftout.readlines() ftout.close() wikipedia.setAction(u'interwikification des traductions (modèle trad)') # si c'est {{-... ou [[... alors on entre dans une autre section, ou fin de liste # if ( re.compile("^(\{\{-|\[\[)",re.M).match(newtext,curidx) ): class TouchBot: def __init__(self, generator, acceptall = False): self.generator = generator self.acceptall = acceptall def run(self): for page in self.generator: try: wikipedia.output('page: %s' % page.title()) lapage = page.get() lapagenew = page.get() # comme newtext, mais avec les commentaires #enlevement des <!-- --> p = re.compile( u"\<\!\-\-(.*?)\-\-\>", re.DOTALL | re.MULTILINE) oldtext = p.sub(u"", lapage) # wikipedia.output( oldtext ) # il faut agir ici... newtext = oldtext curidx = 0 while ( newtext.find(tradmsg, curidx) != -1 ): curidx = newtext.find(tradmsg,curidx) + len(tradmsg) sectiontrad=1 while (sectiontrad == 1): # return original_text # (eventuellement un ":") un *, un espace, un {{code de langue}}, puis des liens, puis retour a la ligne # print "sectrad : %d"%sectiontrad result = re.compile("(^:?\* *\{\{(\w*?)\}\}(.*?\n))",re.MULTILINE).search(oldtext,curidx) if not result: # ne matche plus sectiontrad = 0 else: ligneTotale = result.group(1) lang = result.group(2) # print "curidx %d lastidx %d"%(curidx,result.end(2)) resteAnalyse = result.group(3) newLine = ligneTotale # wikipedia.output(u'reste : %s'%ligneAnalyse) listeT = re.findall("\{\{trad\|\w*?\|(.*?)\}\}" , resteAnalyse ) # print listeT for eltT in listeT : # on ne sait pas traiter si il y a un # if '#' in eltT: continue wikipedia.output(u'recherche de "%s:%s"'%(lang,eltT) ) #chercher ce mot sur le wiktionnaire lang #page_otre = wiktionary.WiktionaryPage(lang,eltT) # site_otre = wikipedia.getSite(code = lang, fam = 'wiktionary') # page_otre = wikipedia.Page(site_otre, eltT) if ( lang not in nocaplng ) : wikipedia.output(u'--------avantcap %s'%eltT) eltTsearch = eltT.upper() + eltT wikipedia.output(u'--------aprescap %s'%eltTsearch) else: eltTsearch = eltT tosearch = u'%s:%s\n'%(lang,eltTsearch) tosearch = tosearch.encode('utf-8') if not ( tosearch in lst_mots ): # position = lst_mots.index( tosearch ) # if not page_otre.exists(): # if not ( position < 0 ): print "DEWIKIFICATION" new = u']'%eltT old = u'{{trad|%s|%s}}'%(lang,eltT) newLine = newLine.replace(old , new) #newtext = newtext.replace(old,new) listeT = re.findall("\\]" , resteAnalyse ) for eltT in listeT : if '#' in eltT: continue if eltT == ':' : # lien du type ] à supprimer old = u']'%eltT new = u'' newLine = newLine.replace(old , new) else: wikipedia.output(u'recherche de "%s:%s"'%(lang,eltT) ) #chercher ce mot sur le wiktionnaire lang #page_otre = wiktionary.WiktionaryPage(lang,eltT) # site_otre = wikipedia.getSite(code = lang, fam = 'wiktionary') # page_otre = wikipedia.Page(site_otre, eltT) if ( lang not in nocaplng ) : wikipedia.output(u'--------avantcap %s'%eltT) eltTcap = eltT.upper() + eltT wikipedia.output(u'--------aprescap %s'%eltTcap) else: eltTcap = eltT tosearch = u'%s:%s\n'%(lang,eltT) tosearch = tosearch.encode('utf-8') if ( tosearch in lst_mots ): # position = lst_mots.index( '%s:%s'%(lang,eltT) ) # if not page_otre.exists(): # if not ( position < 0 ): print "INTERWIKIFICATION" old = u']'%eltT new = u'{{trad|%s|%s}}'%(lang,eltT) newLine = newLine.replace(old , new) # wikipedia.output('old : %s , new : %s , newline : %s'%(old , new , newLine)) #newtext = newtext.replace(old,new) # find de l'analyse de la ligne : on avance d'un pas newtext = newtext.replace(ligneTotale, newLine) lapagenew = lapagenew.replace(ligneTotale, newLine) curidx = result.end(3) #fin du if langue pas valide #fin du if not result #fin du while qu'on est dans la section de traduction # fin du while {{-trad-}} # on uploade le texte if newtext == oldtext: wikipedia.output('No changes were necessary in %s' % page.title()) else: wikipedia.output(u'>>> %s <<<' % page.title()) wikipedia.showDiff(lapage, lapagenew) if not self.acceptall: choice = wikipedia.inputChoice(u'Do you want to accept these changes?', , , 'N') if choice in : self.acceptall = True if self.acceptall or choice in : print "put" page.put(lapagenew) except wikipedia.NoPage: print "Page %s does not exist?!?!"%page.aslink() except wikipedia.IsRedirectPage: pass except wikipedia.LockedPage: pass def main(): #page generator gen = None pageTitle = for arg in sys.argv: arg = wikipedia.argHandler(arg, 'touch') if arg: if arg.startswith('-start:'): gen = pagegenerators.AllpagesPageGenerator(arg) elif arg.startswith('-ref:'): referredPage = wikipedia.Page(wikipedia.getSite(), arg) gen = pagegenerators.ReferringPageGenerator(referredPage) elif arg.startswith('-links:'): linkingPage = wikipedia.Page(wikipedia.getSite(), arg) gen = pagegenerators.LinkedPageGenerator(linkingPage) elif arg.startswith('-file:'): gen = pagegenerators.TextfilePageGenerator(arg) elif arg.startswith('-cat:'): cat = catlib.Category(wikipedia.getSite(), arg) gen = pagegenerators.CategorizedPageGenerator(cat) else: pageTitle.append(arg) if pageTitle: page = wikipedia.Page(wikipedia.getSite(), ' '.join(pageTitle)) gen = iter() if not gen: wikipedia.showHelp('touch') else: preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = TouchBot(preloadingGen) bot.run() if __name__ == "__main__": try: main() finally: wikipedia.stopme()