#! /usr/bin/env python3
from sys import argv
import os, json, traceback, mwparserfromhell
import regex as re
from unicodedata import name as character_name, normalize
from pywikibot import Page, Site
last_saved_filepath = "last_saved.txt"
if len(argv) != 2:
raise ValueError("expected one commandline argument: filename")
print("filename:", argv)
text = open("wrong_script/" + argv, "r").read()
site = Site(code="en", fam="wiktionary")
replacements = {
"ar": {
"ک":"ك", "ی":"ي",
},
"fa": {
"ك":"ک", "ي":"ی", "ى":"ی",
},
"ota": {
"ک":"ك", "ي":"ی", "ى":"ی",
},
"ps": {
"ك":"ک",
},
}
replacements = replacements
language_names = {
"ar": "Arabic", "fa": "Persian", "ota": "Ottoman Turkish", "ps": "Pashto", "ur": "Urdu",
}
single_char = re.compile(".", re.DOTALL)
def fix_Arabic (text, language_code):
return single_char.sub(
lambda char: replacements]
if char in replacements
else char,
text)
def show_characters (chars):
return ", ".join(] ("
+ character_name(char).lower()
.replace("arabic letter ", "")
.replace("farsi", "Farsi")
+ ")" for char in chars])
def iterate_template_data (text, skip_to_title):
start_processing = skip_to_title == None
for line in text.splitlines():
data = json.loads(line)
title = data
if not start_processing:
if title == skip_to_title:
start_processing = True
else:
continue
yield title, data
def process_pages (text, skip_to):
for (title, templates) in iterate_template_data(text, skip_to):
print("title: ]".format(title))
corrections =
languages = set()
# Avoid loading page if no changes need to be made.
page = None
old_text = None
for instance in templates:
template_text = instance
template = mwparserfromhell.parse(template_text)
try:
template = template.get(0)
except:
print("no template")
continue
language_code = instance
link_target = instance
link_target_param = instance
if not (language_code and link_target and link_target_param):
print("missing language code or term")
continue
if language_code and language_code in replacements and link_target:
link_target_corrected = fix_Arabic(link_target, language_code)
if link_target != link_target_corrected:
if page == None:
page = Page(site, title)
old_text = page.text
print("{} \N{RIGHTWARDS ARROW} {} ({})".format(link_target,
link_target_corrected,
language_code))
template.add(link_target_param, link_target_corrected)
page.text = page.text.replace(template_text, str(template))
for x in link_target:
if x in replacements:
corrections.append((x, replacements))
languages.add(language_code)
else:
print("did not make any changes to", template_text, "automatically")
corrections = list(zip(*list(corrections)))
if len(corrections) == 2 and page.text != old_text:
correction_len = 0
old = corrections
new = corrections
if all( for x in old]) and all( for x in new]):
correction_len = len(old)
old = ]
new = ]
else:
old = list(old)
Cyrillic = list(old)
old = show_characters(old)
new = show_characters(new)
len_str = " " + str(correction_len) + " times" if correction_len > 1 else ""
summary = "correcting Arabic-script characters: replaced {} with {}{} in {}".format(
old,
new,
len_str,
" and ".join(language_names for language_code in list(languages)))
print("> summary:", summary)
while True:
answer = input("> Save edit? y/n (or quit: q)\n>>> ")
if len(answer) > 0:
answer = answer.lower()
if answer == "y":
page.save(summary=summary, minor=True, watch="watch")
print("")
break
elif answer == "q":
print("> quitting")
return title
elif answer == "n":
print("")
break
else:
print("> Answer not recognized.")
else:
print("> no changes\n")
else:
print("done!")
last_saved = os.remove(last_saved_filepath)
return None
try:
skip_to = None
try:
last_saved = open(last_saved_filepath, "r")
lines =
skip_to = lines
print("skipping to ]\n".format(skip_to))
except:
print("no page to skip to")
title = process_pages(text, skip_to)
if title:
last_saved = open(last_saved_filepath, "w")
last_saved.write(title)
except Exception as e:
print(e)
traceback.print_exc()
print("quitting")