User:CrowleyBot/task/1

Technical details

Maintain a list about what is legit after what.
- "Synonyms" is usually son of POS. It can also be L3.
- "Alternative forms" is usually L3. It can be son of POS.
If something will be L3 after the process, it should be checked.
- Such as ស, teshem
is caused by . It is partially reverted.
The error report of the second batch. Pages in the error report is not touched.
Source code

from prelude import  *
from botaccount import *

"normal-enwikt.py"

en = mwc.Site('en.wiktionary.org', clients_useragent = UA)
en.login(UN, PWD)

#pn = list(map(lambda p: p.name, epgl))
#n = len(pn)
defaultsummary = ''

etxtl = .text() for x in pn]
esecl = 
etxtn =  * n
todo, todo1, todo2, todo3 = , , , 
summary =  * n
log = 


@fct.total_ordering
class node:
    def __init__(s, kyu=0, title='', a=0, b=0, c=0, z='', t='', dummy = 0):
        if dummy:
            s.kyu = kyu
            s.title = dummy
            return
        s.kyu, s.title, s.oldkyu = kyu, title, kyu
        s.a, s.b, s.c, s.z, s.t = a, b, c, z, t
        s.l, s.r, s.f, s.s, s.tp = None, None, None, , -1
    
    def __bool__(s):
        return isinstance(s.title, str)
    
    def __eq__(x, y):
        return x.kyu == y.kyu
    
    def __lt__(x, y):
        return x.kyu < y.kyu
    
    def __str__(s):
        return "%s%s%s" % ('=' * s.kyu, s.title, '=' * s.kyu)
    
    def __repr__(s):
        return "%s%s%s%s" % ('=' * s.kyu, s.title, '=' * s.kyu, s.t)
    
    def printtree(s, i=0):
        print('  ' * i + str(s))
        for ss in s.s:
            ss.printtree(i + 1)

def process1():
    # *? for non-greedy
    # On en.wikt, User:Erutuon has ensured the sameness of '=', no redundant spaces and no '=' in in section titles, as well as no L1
    rx1 = re.compile(r'^(===*)\s*(*?)\s*(===*)\s*?(\n+)', flags=re.MULTILINE)
    def _f1(m):
        return node(min(len(m), len(m)), m, m.start(), m.end(), m.end(), m)
    etxt = etxtl
    esecs =  + list(map(_f1, rx1.finditer(etxt))) + 
    for j in range(len(esecs) - 1):
        esecs.c = esecs.a
        esecs.t = etxt.b:esecs.c]
    del esecs
    esecl = esecs

# "Root" have different meaning in namespace Reconstruction
# "Proverbs" and "Citations" can be 9
typelst = [
    , , ,
    , ,
    , ,
    ,
    ,
    ,
    ,
    ,
    ,
    , 
]
typedct = defaultdict(bool)
typedct = False
for i, l in enumerate(typelst):
    for x in l:
        typedct = i

def processt():
    for nd in esecl:
        if nd.kyu > 2:
            st.add(nd.title)

def process3():
    esecs = esecl
    def gentype(x):
        if x.oldkyu in :
            x.tp = x.kyu
            return True
        if x.oldkyu in :
            return False
        if 'Etymology ' in x.title or 'Reconstruction ' in x.title:
            x.tp = 4
            return True
        if 'Pronunciation ' in x.title:
            x.tp = 6
            return True
        key = re.sub(r' \d+', '', x.title)
        x.tp = typedct
        if x.tp == False:
            return False
        return True
    
    def link(x, y):
        x.s.append(y)
        y.f = x
        if x.kyu == 0:
            y.kyu = 2
        else:
            y.kyu = x.kyu + 1
    
    def canlink(x, y):
        linkd = {(0, 2), (2, 3), (2, 4), (2, 5), (2, 6), (2, 7), (2, 9), (2, 10), (2, 11), (2, 12), (2, 13), (2, 14), (3, 6), (4, 5), (4, 6), (4, 7), (4, 9), (4, 10), (4, 11), (4, 12), (4, 14), (5, 14), (6, 3), (6, 7), (6, 9), (6, 10), (6, 11), (6, 12), (6, 14), (7, 8), (7, 9), (7, 10), (7, 11)}
        return (x.tp, y.tp) in linkd
    
    def trylink(x, y, h):
        while x and x.kyu >= h:
            if canlink(x, y):
                link(x, y)
                return True
            x = x.f
        return False
    
    esecs.tp = 0
    for ((p, x), (q, y)) in its.pairwise(enumerate(esecs)):
        if not gentype(y):
            print(("gentype", i, p, q, str(x), str(y)))
            fail.append(i)
            fail1.append(("gentype", i, p, q, str(x), str(y)))
            return False
        if (x.tp == 4 and y.tp in ) or (x.tp == 6 and y.tp in ):
            # Etymology n and Pronunciation n should have a son
            if trylink(x, y, 0):
                continue
        elif x.oldkyu > y.oldkyu:
            z = x.f
            while z.kyu >= y.kyu:
                z = z.f
            if trylink(z, y, 0):
                continue
            if trylink(x, y, z.kyu + 1):
                continue
        elif x.oldkyu == y.oldkyu:
            if trylink(x.f, y, 0):
                continue
            if trylink(x, y, x.kyu):
                continue
        else:
            if trylink(x, y, 0):
                continue
        print(("resolve", i, p, q, str(x), str(y)))
        fail.append(i)
        fail1.append(("resolve", i, p, q, str(x), str(y)))
        return False
    
    return True

def process4():
    tmp = 
    for nd in esecl:
        if nd.kyu != nd.oldkyu:
            tmp.append('L%d -> L%d: %s' % (nd.oldkyu, nd.kyu, nd.title))
            log.append((i, nd.kyu, nd.title, nd.f.title))
            if nd.oldkyu - nd.f.oldkyu >= 2:
                summary = 'Fix L%d after L%d. ' % (nd.oldkyu, nd.f.oldkyu)
    summary += ', '.join(tmp)
    etxtn = ''.join(map(lambda nd: str(nd) + nd.z + nd.t, esecl))

def process5():
    tryedit(en.Pages], etxtn, summary, fail=fail)
User:CrowleyBot/task/1

Technical details

Source code

Wikious

Boobota

Sagapedia