User:AutoFormat/code/langcodes

Hello, you have come here looking for the meaning of the word User:AutoFormat/code/langcodes. In DICTIOUS you will not only get to know all the dictionary meanings for the word User:AutoFormat/code/langcodes, but we will also tell you about its etymology, its characteristics and you will know how to say User:AutoFormat/code/langcodes in singular and plural. Everything you need to know about the word User:AutoFormat/code/langcodes you have here. The definition of the word User:AutoFormat/code/langcodes will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofUser:AutoFormat/code/langcodes, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.



#!/usr/bin/python
# -*- coding: utf-8  -*-
# wikipath en wiktionary User:AutoFormat/code/langcodes


"""
This code looks for language code templates in the en.wiki, using API from live DB

Writes AF control file

No command line arguments.

writes reports
"""

import wikipedia
import xmlreader
import sys
import re
import pickle
import xmldate
import socket

def safe(s):

    ss = pickle.dumps(s)
    l = len(ss)
    return ss

def skey(s):

    # sort key to put codes in preferred order:
    sk = '%02d%s' % (len(s), s)

    # except:
    if s == 'zh': sk = '04zh' # after '03cmn'

    return sk

def main():

    socket.setdefaulttimeout(30)

    # make sure we are logged in
    site = wikipedia.getSite()
    site.forceLogin()
    wikipedia.setAction('writing report')

    Langs = set()
    Lcodes = {}
    poscodes = set()

    retitle = re.compile(r'title="(*)"')
    recmatch = re.compile(r'Template:{2,10}$')
    reccont = re.compile(r'cmcontinue="(*)"')
    k = 0

    # get category from live wikt (too much variation)

    ccont = '!'

    while ccont:

        print "getting cat from", ccont

        cats = site.getUrl("/w/api.php?action=query&list=categorymembers" \
                           "&cmtitle=Category:Language_templates&cmlimit=1000" \
                           "&cmcontinue=" + ccont + "&format=xml")   

        for title in retitle.findall(cats):

            if not recmatch.match(title):
                print "skipped", repr(title)
                continue

            poscodes.add(title)
            k += 1

        mo = reccont.search(cats)
        if mo: ccont = mo.group(1)
        else: ccont = ''

    print "possible templates found", k

    # now get content from XML scan

    relink = re.compile(r"\{\{\{l\|']*\}\}\}")
    reincl = re.compile(r'<noinclude.*$', re.S)
    reonly = re.compile(r'^.*<onlyinclude>(.*)</onlyinclude>.*$', re.S)

    # get XML dump
    dump = xmlreader.XmlDump("en-wikt.xml")

    for entry in dump.parse():
        title = entry.title

        if title not in poscodes: continue
        poscodes.remove(title)

        code = title

        text = entry.text

        text = relink.sub('', text)
        text = reincl.sub('', text)
        text = reonly.sub(r'\1', text)

        lang = text

        # special case(s)

        if code == 'see': lang = 'Seneca'

        # bugs, fixed in next XML, 4.5.10
        if code == 'oun': lang = '!O!ung'
        if code == 'bdf': lang = 'Biage'

        # got one!

        print safe(code), safe(lang)

        Langs.add(lang)
        if lang in Lcodes: Lcodes.append(code)
        else: Lcodes = 

    # not found?

    if poscodes: print "not found:", repr(poscodes)

    # report for AF control file:

    report = '\n{| class="prettytable"\n'
    report += '|-\n| | codes\n| |Language\n'

    for lang in sorted(Langs):
        codes = u','.join(sorted(Lcodes, key=skey))
        report += "|-\n| " + codes + '||' + lang + '\n'

    report += "|}\n"
    wikipedia.setAction('writing AutoFormat language table')

    # write the AutoFormat table page

    try:
        reportpage = wikipedia.Page(site, 'User:AutoFormat/Languages')
        oldreport = reportpage.get(sysop = True)
    except wikipedia.NoPage:
        print "No present report for %s" % reportpage.aslink()

    # file the report
    reportpage.put(report)


if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()