This script is an adaptation of the one used on el.wiktionary to add sorting keys to Greek words. It adds the DEFAULTSORT key before the interwikis.
# -*- coding: utf-8 -*- u""" Much of this code was stolen from cosmetic_changes.py. """ __version__ = '$Id: default_kleidaTaksinomhshs.py 4260 2007-09-12 22:12:11Z wikipedian $' import wikipedia, pagegenerators, string import sys import re warning = u"""If you are running this bot outside en.wiktionary, please reconsider""" docuReplacements = { '¶ms;': pagegenerators.parameterHelp, '&warning;': warning, } # Summary message when using this module as a stand-alone script msg_standalone = { 'en': u'Bot: defaultsort key', } class EpiloghKleidaTaksinomhshsToolkit: def __init__(self, site, title, exceptions = , debug = False): self.site = site self.debug = debug self.title = title self.exceptions = exceptions def allagh(self, keimeno): """ Given a wiki source code text, returns the cleaned up version. FIXME σύνοψη εδώ """ defaultsort_templ=re.compile(u'\{\{DEFAULTSORT\:(*)\}\}') if defaultsort_templ.search(keimeno): kleidakeimenou=defaultsort_templ.search(keimeno).group(1) protypo=self.paragwghKleidaTaksinomhshsProtypo() kleida=defaultsort_templ.search(protypo).group(1) if kleida==kleidakeimenou: return keimeno palioKeimeno = keimeno protypo = u'{{DEFAULTSORT}}' # αφαιρούμε την επόμενη κενή γραμμή μαζί με το πρότυπο αν υπάρχει protyporegexp = u'\{\{DEFAULTSORT\:*\}\}(\r\n\r\n|$)' keimeno = self.removeKleidaTaksinomhshsProtypo(keimeno,protyporegexp) protyporegexp1 = u'\{\{DEFAULTSORT\:*\}\}\r\n' keimeno = self.removeKleidaTaksinomhshsProtypo(keimeno,protyporegexp1) keimeno = self.addKleidaTaksinomhshsProtypo(keimeno,protypo) if self.debug: wikipedia.showDiff(palioKeimeno, keimeno) return keimeno def paragwghKleidaTaksinomhshsProtypo(self): parametros = self.title().lower() mtg_apo = u'ά έ ή ί ϊ ΐ ό ύ ϋ ΰ ώ ς ά έ ή ί ό ύ ώ ᾴ ῄ ῴ ὰ ὲ ὴ ὶ ὸ ὺ ὼ ᾲ ῂ ῲ ᾶ ῆ ῖ ῦ ῶ ᾷ ῇ ῷ ῗ ῧ ῒ ῢ ΐ ΰ ᾳ ῃ ῳ ἀ ἐ ἠ' mtg_se = u'α ε η ι ι ι ο υ υ υ ω σ α ε η ι ο υ ω α η ω α ε η ι ο υ ω α η ω α η ι υ ω α η ω ι υ ι υ ι υ α η ω α ε η' mtg_apo = mtg_apo + u' ἰ ὀ ὐ ὠ ᾀ ᾐ ᾠ ἄ ἔ ἤ ἴ ὄ ὔ ὤ ᾄ ᾔ ᾤ ἂ ἒ ἢ ἲ ὂ ὒ ὢ ᾂ ᾒ ᾢ ἆ ἦ ἶ ὖ ὦ ᾆ ᾖ ᾦ ἁ ἑ ἡ ἱ ὁ' mtg_se = mtg_se + u' ι ο υ ω α η ω α ε η ι ο υ ω α η ω α ε η ι ο υ ω α η ω α η ι υ ω α η ω α ε η ι ο' mtg_apo = mtg_apo + u' ὑ ὡ ἅ ἕ ἥ ἵ ὅ ὕ ὥ ᾅ ᾕ ᾥ ἃ ἓ ἣ ἳ ὃ ὓ ὣ ᾃ ᾓ ᾣ ἇ ἧ ἷ ὗ ὧ ᾇ ᾗ ᾧ ᾰ ῐ ῠ ᾱ ῑ ῡ ῥ' mtg_se = mtg_se + u' υ ω α ε η ι ο υ ω α η ω α ε η ι ο υ ω α η ω α η ι υ ω α η ω α ι υ α ι υ ρ' trkeys = mtg_apo.split(u' ') for i in range(len(trkeys)): trkeys = ord(trkeys) trvals = mtg_se.split(u' ') trtable = dict(zip(trkeys,trvals)) parametros = parametros.translate(trtable) parametros = re.sub(u'', u'', parametros) protypo = u'{{DEFAULTSORT:' + parametros + '}}' return protypo def removeKleidaTaksinomhshsProtypo(self,keimeno,protypo): u""" remove old template if it is there """ teliko_keimeno = re.sub(protypo,u'',keimeno) return teliko_keimeno def addKleidaTaksinomhshsProtypo(self,keimeno,protypo): u""" stuff provided protypo into the wikitext right before interwiki links. """ marker = '@@' while marker in keimeno: marker += '@' site = self.site protypo = self.paragwghKleidaTaksinomhshsProtypo() interwiki = wikipedia.getLanguageLinks(keimeno, insite = site) textnoiws = wikipedia.removeLanguageLinks(keimeno.replace(marker,'').strip(), site = self.site) + site.family.category_text_separator + protypo + site.family.category_text_separator teliko_keimeno = wikipedia.replaceLanguageLinks(textnoiws, interwiki, site = self.site) return teliko_keimeno class EpiloghKleidaTaksinomhshsBot: def __init__(self, generator, exceptions=, acceptall = False): self.generator = generator self.acceptall = acceptall self.exceptions = exceptions # Load default summary message. wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg_standalone)) def checkExceptions(self, original_text): """ If one of the exceptions applies for the given text, returns the substring which matches the exception. Otherwise it returns None. """ for exception in self.exceptions: hit = exception.search(original_text) if hit: return hit.group(0) return None def treat(self, page): try: # Show the title of the page we're working on. # Highlight the title in purple. wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) ccToolkit = EpiloghKleidaTaksinomhshsToolkit(page.site(), page.title, debug = True) keimeno = page.get() match = self.checkExceptions(keimeno) # skip all pages that contain certain texts if match: wikipedia.output(u'Skipping %s because it contains %s' % (page.aslink(), match)) else: allages = ccToolkit.allagh(keimeno) if allages != keimeno: if not self.acceptall: choice = wikipedia.inputChoice(u'Do you want to accept these changes?', , , 'N') if choice in : self.acceptall = True if self.acceptall or choice in : page.put(allages) else: wikipedia.output(u"No changes for %s" % page.title()) except wikipedia.NoPage: wikipedia.output(u"Page %s does not exist;!" % page.aslink()) except wikipedia.IsRedirectPage: wikipedia.output("Page %s is a redirect, skipping." % page.aslink()) except wikipedia.LockedPage: wikipedia.output(u"Page %s is locked?!" % page.aslink()) def run(self): for page in self.generator: self.treat(page) original_text = page.get() def main(): #page generator gen = None PageTitles = exceptions= namespaces = regex = False caseInsensitive = False # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): if arg.startswith('-except:'): exceptions.append(arg) elif arg == '-regex': regex = True elif arg == '-nocase': caseInsensitive = True elif arg.startswith('-namespace:'): try: namespaces.append(int(arg)) except ValueError: namespaces.append(arg) elif arg.startswith('-page'): if len(arg) == 5: PageTitles.append(wikipedia.input(u'Which page do you want to change?')) else: PageTitles.append(arg) else: generator = genFactory.handleArg(arg) if generator: gen = generator else: wikipedia.showHelp() for i in range(len(exceptions)): exception = exceptions if not regex: exception = re.escape(exception) if caseInsensitive: exceptionR = re.compile(exception, re.UNICODE | re.IGNORECASE) else: exceptionR = re.compile(exception, re.UNICODE) exceptions = exceptionR if PageTitles: pages = gen = iter(pages) if not gen: wikipedia.showHelp() elif wikipedia.inputChoice(warning + u'\nDo you want to continue?', , , 'N') == 'y': if namespaces != : gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = EpiloghKleidaTaksinomhshsBot(preloadingGen, exceptions) bot.run() if __name__ == "__main__": try: main() finally: wikipedia.stopme()