#!/usr/bin/python
# -*- coding: utf-8 -*-
# wikipath en wiktionary User:Interwicket/code/iwlinks
import wikipedia
import re
renotags = re.compile(r'<nowiki>.*?||', \
re.IGNORECASE | re.DOTALL)
reiwiki = re.compile(r'\{2,10}):(\n]+)\]\]')
def getiwlinks(text, flws):
mt = renotags.sub(, text)
links = { } for code, title in reiwiki.findall(mt):
if code not in flws: continue if flws.lockedwikt and not flws.deletecode: continue links = title
return links
def replaceiwlinks(text, links, flw, flws):
links = links.copy() # private copy (shallow, okay)
# proceed as above in finding old links, but different action # duplicate codes are silently elided (probably not best, but as before)
mt = renotags.sub(, text)
for code, title in reiwiki.findall(mt):
if code not in flws: continue text = re.sub(r'\\]\s*', , text)
# no add or remove links to locked wikts (mostly harmless, but not worth it) # do remove explicit deletes if flws.lockedwikt and not flws.deletecode and code not in links: links = title
# strip WS at bottom (and top for pl.wikt) text = text.strip('\n ')
# sort if needed linklist = pf = flw.site.interwiki_putfirst() if pf: for code in pf: if code in links: linklist.append(" + "]]") del links # remaining, or all in code order: for code in sorted(links): linklist.append(" + "]]")
if flw.oneline: ls = ' '.join(linklist) else: ls = '\n'.join(linklist)
if flw.attop: newt = ls + '\n' + text else: newt = text + '\n\n' + ls
return newt
if __name__ == "__main__":
from reciprocal import flws # init all the flws:
for code in flws.site.family.langs: foo = flws
code = 'sw' title = 'cat'
print "sh status", flws.status, "locked", flws.lockedwikt
# get some page, try a few things
page = wikipedia.Page(flws.site, title)
text = page.get()
links = getiwlinks(text, flws)
print title, ":", repr(links)
# if 'ta' in links: del links # links = title
# so following should be no-op if 'sh' in links: del links
newt = replaceiwlinks(text, links, flws, flws)
wikipedia.showDiff(text, newt)
page.put(newt)</nowiki>