This simple tool examines a certain xml file, obtained through Special:Export, finds and prints all entries contained with more than one language header. It's useful, for example, to know if we can move a certain Romanian entry with ş or ţ to a new name with ș, ț.
cat roverbs.xml | ./lang_headers.py
#!/usr/bin/python # -*- coding: utf-8 -*- import sys, re fin = sys.stdin page_tag = re.compile('<page>') title_tag = re.compile('<title>') lang_tag = re.compile('==(+)==$') title_content = re.compile('<title>(+):(.+)<\/title>') title_capture = re.compile('<title>(.*)<\/title>') comment_tag = re.compile('<comment>(.*)<\/comment>') page_tag_end = re.compile('<\/page>') main_string = re.compile('main') eof=0 while not eof: line = fin.readline() if line == "": eof = 1 elif page_tag.search(line): namespace="" title="" langs=0 section = elif title_tag.search(line): result = title_content.search(line) if result: namespace=result.group(1) title=result.group(2) else: result = title_capture.search(line) if result: namespace="main" title=result.group(1) elif comment_tag.search(line): result = comment_tag.search(line) elif lang_tag.search(line): result = lang_tag.search(line) if result: section.append(result.group(1)) langs= langs+1 elif page_tag_end.search(line): if langs > 1 and main_string.search(namespace): print(title), " ", for j in range(0,langs): print(section)," ", print fin.close()