User:AutoFormat/code/contexts

Hello, you have come here looking for the meaning of the word User:AutoFormat/code/contexts. In DICTIOUS you will not only get to know all the dictionary meanings for the word User:AutoFormat/code/contexts, but we will also tell you about its etymology, its characteristics and you will know how to say User:AutoFormat/code/contexts in singular and plural. Everything you need to know about the word User:AutoFormat/code/contexts you have here. The definition of the word User:AutoFormat/code/contexts will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofUser:AutoFormat/code/contexts, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.



#!/usr/bin/python
# -*- coding: utf-8  -*-
# wikipath en wiktionary User:AutoFormat/code/contexts


"""
This code looks for contexts formatted as ('' '') on definition lines

Writes AF control file, context templates list, context exceptions list

No command line arguments.

writes report
"""

import wikipedia
import xmlreader
import sys
import re
import pickle
import xmldate
from mwapi import getwikitext

def safe(s):

    ss = pickle.dumps(s)
    l = len(ss)
    return ss

# parse spec

relab1 = re.compile(r'label=(*)\]+)\]\](*)\|')
relab2 = re.compile(r'label=(]+)\|')
retc = re.compile(r'topcat=(+)\|')
repc = re.compile(r'poscat=(+)\|')
rerc = re.compile(r'regcat=(+)\|')
recc = re.compile(r'()cat=(+)\|')
relang = re.compile(r'lang=\{\{#if:\{\{\{lang\|}}}\|\{\{\{lang}}}\|(+)}}\|')
respace = re.compile(r'_\|')
retcat = re.compile(r'tcat=(+)\|')

def respec(spec):

    if spec.startswith('{{context {{{sub|}}}|'): spec = spec
    elif '{{context' not in spec: spec = "(not a context label)"
    if spec.endswith('<noinclude>'): spec = spec
    spec += '|'
    spec = relab1.sub(r'label:\1]\3, ', spec)
    spec = relab2.sub(r'label:\1, ', spec)
    spec = retc.sub(r'topic category:\1, ', spec)
    spec = repc.sub(r'POS category:\1, ', spec)
    spec = rerc.sub(r'regional category:\1, ', spec)
    spec = recc.sub(r'\1category:\2, ', spec)
    spec = relang.sub(r'default language:\1, ', spec)
    spec = respace.sub(r'(space), ', spec)
    spec = spec.strip(' ,|')
    if '{' in spec: spec = '<nowiki>' + spec + ''
   return spec

def main():

   # make sure we are logged in
   site = wikipedia.getSite()
   site.forceLogin()
   wikipedia.setAction('writing report')
   # get XML dump
   dump = xmlreader.XmlDump("en-wikt.xml")
   entries = 0
   words = 0
   ctxs = 0
   Contexts = { }
   Examples = { }
   Templates = { }
   Redirs = { }
   First = { }
   Specs = { }
   Date = { }
   Cats = { }
   Bad = { }
   recontext = re.compile(r"^# *(\(|\)|\{\{italbrac\|)(.+?)(\(|\)|}})", re.M)
   reredir = re.compile(r"#redirect\s*\\]", re.I)
   recats = re.compile(r"\]*)", re.I)
   reiwiki = re.compile(r'\{2,9}:.+\]\]')
   for entry in dump.parse():
       text = entry.text
       title = entry.title
       entries += 1
       if entries % 10000 == 0:
           print "%d entries, %d words, %d contexts" % (entries, words, ctxs)
       # skip redirects, unless Templates
       if text and text == '#':
           mo = reredir.match(text)
           if mo:
               Redirs] = mo.group(1)
               print "redirect: %s to %s" % (safe(title), safe(mo.group(1)))
           continue
       # look for templates
       if title.startswith('Template:') and text.find('{{context') >= 0:
           # but not the templates themselves!
           if title.find('/') >= 0: continue
           if title.find('context') >= 0: continue
           if title.find('checklabel') >= 0: continue
           if title.find('pos-') >= 0: continue
           tname = title
           Templates = tname
           First = text.splitlines()
           Date = xmldate.enXMLdate # sans year
           print "template: ", safe(tname)
           spec = respec(First)
           # if spec is "bad" (contains { is a good indication) pick up current, also other stuff
           bad = False
           if '{' in spec or '}' in spec: bad = True
           if reiwiki.search(text): bad = True
           if '}}\n<noinc' in text: bad = True
           if bad:
               oldtext = text
               print '    getting current version'
               try:
                   page = wikipedia.Page(site, 'Template:' + tname)
                   # text = page.get(sysop = True)  # for protected pages
                   text = getwikitext(page)
               except wikipedia.NoPage:
                   print "Can't get %s from en.wikt" % safe(page.aslink())
                   text = 
                   Date = "can't access"
               except wikipedia.IsRedirectPage, target:
                   print "Page %s is now a redirect" % safe(page.aslink())
                   text = 
                   Date = '9 July'
                   First = u'redirect to ' + target
               if not text:
                   text = oldtext
               else:
                   First = text.splitlines()
                   Date = '9 July'
                   spec = respec(First)
           cats = 
           # extract tcat from spec
           mo = retcat.search(spec + '|')
           if mo:
                spec = retcat.sub(, spec + '|').strip('|')
                cats += mo.group(1).capitalize() + ' context labels, '
           # explicit cats
           for cat in recats.findall(text): cats += cat + ', '
           Cats = cats.strip(', ')
           if cats: print "    cats %s" % safe(Cats)
           # trouble
           Bad = 
           # look for iwikis, to report:
           for iw in reiwiki.findall(text):
               Bad += ', bad iwiki: ' + iw + ''
               print "    bad iwiki ", safe(iw)
           if '}}\n<noinc' in text: Bad += ', extra line break'
           Specs = spec
       # skip non main-name-space
       if title.find(':') >= 0:
           continue
       words += 1
       # if entries > 5000: break
       # parse text ...
       for ctxtup in recontext.findall(text):
           ctxstr = ctxtup
           for ctx in ctxstr.split(','):
               ctx = ctx.strip("' ")
               if not ctx: continue
               if ctx == 'w:' and ctx.find('|') > 0: ctx = ctx.split('|')
               if ctx > 'z' or ctx < 'A': continue
               if ctx in Contexts:
                   Contexts += 1
               else: 
                   Contexts = 1
                   print 'context: %s' % safe(ctx)
               if ctx in Examples:
                   if len(Examples) < 70: Examples += ' ' + title + ''
               else:
                   Examples = '' + title + ''
                   
               ctxs += 1
           # end of for context string
       # end of for entry
   print "%d entries, %d words, %d ctxs" % (entries, words, ctxs)
   # and write the AutoFormat control file
   try:
       reportpage = wikipedia.Page(site, 'User:AutoFormat/Contexts')
       oldreport = reportpage.get()
   except wikipedia.NoPage:
       print "No present report for %s" % reportpage.aslink()
       oldreport = "(edit above this line)\n----\n"
   report = oldreport + '----\n'
   report += '\nas of ' + xmldate.enXMLdate + ';\n'
   report += 'context templates, redirects, this section generated by bot, edit above horizontal rule\n'
   report += '\n{| border="1" cellpadding="4" cellspacing="0" style="border-collapse: collapse;"\n'
   report += '|-\n| | Context string\n| |Template name\n'
   for ctx in sorted(Templates):
       tname = Templates
       report += "|-\n| " + ctx + " ||" + tname + '\n'
   report += "|}\n\nRedirects:\n\n"
   report += '\n{| border="1" cellpadding="4" cellspacing="0" style="border-collapse: collapse;"\n'
   report += '|-\n| | Context string\n| |Redirect\n'
   print "WARNING: still writing redirects separately"
   for red in sorted(Redirs):
       if Redirs.lower() in Templates:
            # redirect to a template, so valid
            # if just a case variant, ignore it, we match anyway
            if red.lower() == Redirs.lower(): continue
            report += "|-\n| " + red.lower() + " ||" + red + '\n'
   report += "|}\n"
   wikipedia.setAction('writing report')
   # file the report
   reportpage.put(report)
   # add redirs to Templates, set "first line" to be the redirect
   for red in Redirs:
       if Redirs.lower() in Templates:
            Templates = Templates.lower()]
            First = u'redirect to ' + Redirs
            Specs = First
            Cats = 
            Date = xmldate.enXMLdate
            Bad = 
   # write context labels report
   report = 'Context label templates:\n\n'
   report += '* categories are those specified explicitly, including with tcat=, not the default cat\n\n'
   report += '\n{| border="1" cellpadding="4" cellspacing="0" style="border-collapse: collapse;"\n'
   report += '|-\n| |Template\n| width=10% |as of\n| |Specification\n| width=15% |Template category\n'
   for tname in sorted(First, key=unicode.lower):
       report += '|-\n| ' + tname +' || ' + Date + ' || ' \
            + Specs + Bad + ' || ' + Cats + '\n'
       print "label %s: %s" % (safe(tname), safe(Specs))
   report += "|}\n"
   wikipedia.setAction('writing report')
   # write the report page
   try:
       reportpage = wikipedia.Page(site, 'User:Robert Ullmann/Context labels')
       oldreport = reportpage.get()
   except wikipedia.NoPage:
       print "No present report for %s" % reportpage.aslink()
   # file the report
   reportpage.put(report)
   # write contexts report
   thresh = 3
   report = '\nas of ' + xmldate.enXMLdate + '\n'
   report += 'Contexts given in definition lines without templates: '
   report += '\n%d different "contexts" found, report is those occuring at least %d times' % (len(Contexts), thresh)
   report += ' or that do have matching templates\n'
   report += '\n{| border="1" cellpadding="4" cellspacing="0" style="border-collapse: collapse;"\n'
   report += '|-\n| | Context\n| |Template\n| |Occurs\n| | Examples\n'
   for ctx in sorted(Contexts):
       tname = ctx.lower().strip('')
       if tname in Templates: tname = Templates
       else: tname = 
       if not tname and Contexts < thresh: continue
       report += "|-\n| " + ctx + " ||" + tname + ' ||' + str(Contexts) + '||' + Examples + '\n'
   report += "|}\n"
   wikipedia.setAction('writing report')
   # write the report page
   try:
       reportpage = wikipedia.Page(site, 'User:Robert Ullmann/Contexts')
       oldreport = reportpage.get()
   except wikipedia.NoPage:
       print "No present report for %s" % reportpage.aslink()
   # file the report
   reportpage.put(report)


if __name__ == "__main__":

   try:
       main()
   finally:
       wikipedia.stopme()</nowiki>