User:Robert Ullmann/code/level3

Hello, you have come here looking for the meaning of the word User:Robert Ullmann/code/level3. In DICTIOUS you will not only get to know all the dictionary meanings for the word User:Robert Ullmann/code/level3, but we will also tell you about its etymology, its characteristics and you will know how to say User:Robert Ullmann/code/level3 in singular and plural. Everything you need to know about the word User:Robert Ullmann/code/level3 you have here. The definition of the word User:Robert Ullmann/code/level3 will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofUser:Robert Ullmann/code/level3, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.



#!/usr/bin/python
# -*- coding: utf-8  -*-
# wikipath en wiktionary User:Robert Ullmann/code/level3


"""
This code looks for valis and invalid L3 headers in the en.witk

No command line arguments.

writes reports
"""

import wikipedia
import xmlreader
import sys
import re
import pickle
import xmldate

def safe(s):

    ss = pickle.dumps(s)
    l = len(ss)
    return ss

def main():

    # make sure we are logged in
    site = wikipedia.getSite()
    site.forceLogin()
    wikipedia.setAction('writing report')

    # get XML dump
    dump = xmlreader.XmlDump("en-wikt.xml")

    entries = 0
    words = 0
    L3headers = 0

    # valid headers have notes
    Notes = {}
    # all headers have occurance counts
    Occurs = {}
    # invalid headers have examples
    Examples = {}

    # initialize some valid headers

    # standard POS, etc:
    for header in ('Noun', 'Verb', 'Adjective', 'Adverb', 'Pronoun',
            'Proper noun', 'Preposition', 'Conjunction', 'Interjection',
            'Article', 'Prefix', 'Suffix', 'Affix', 'Infix', 'Counter'):
        Notes = 'standard POS header'

    for header in ('Initialism', 'Abbreviation', 'Letter', 'Symbol', 'Acronym', 'Proverb',
            'Contraction', 'Idiom', 'Phrase', 'Syllable'):
        Notes = 'standard non-POS header'

    for header in ('Number', 'Numeral', 'Cardinal number', 'Cardinal numeral', 'Ordinal number', 'Ordinal numeral'):
        Notes = "see note ''supra''"

    for header in ('Etymology', 'Pronunciation', 'Trivia', 'Alternative spellings', 'Alternative forms',             'Anagrams', 'Usage notes'):
        Notes = "standard L3 header"

    for num in range(0, 25):
        Notes = 'standard L3 header'

    for header in ('Related terms', 'Derived terms', 'Descendants', 'See also', 'References', 'External links', 'Quotations'):
        Notes = "standard L4/L3 header"

    for header in ('Declension', 'Conjugation', 'Inflection', 'Antonyms', 'Synonyms', 'Translations'):
        Notes = "header should be at L4"

    for header in ('Han character', 'Kanji', 'Hanzi', 'Hanja'):
        Notes = "valid in single Han character entries only, ''not checked''"

    Notes = "valid only for single syllable entries, ''not checked''"

    for header in ('{{abbreviation}}', '{{acronym}}', '{{initialism}}', '{{numeral}}'):
        Notes = "L3 POS header templates"

    for entry in dump.parse():
        text = entry.text
        title = entry.title

        entries += 1
        if entries % 10000 == 0:
            print "%d entries, %d words, %d L3 headers" % (entries, words, L3headers)

        # skip non main-name-space

        if title.find(':') >= 0 or title.find('/') >= 0:
            continue
	else:
            words += 1

            # if entries > 5000: break

            if title.startswith('Glossary of'): continue

            # parse text ...

            for line in text.splitlines():

                # comments on the (presumed) end of lines
                if line.find('<!--') >= 0: line = line.split('<!--')

                if line != '===': continue
                if line == '=': continue

                L3headers += 1

                header = line.strip().strip()

                # template mess
                if header == '{{': header = re.sub(r'(.*?)\|.*?\}(.*)', r'\1}\2', header)

                if header not in Occurs: Occurs = 0
                Occurs += 1

                if header in Notes: continue

                if header not in Examples:
                    Examples = ']'
                    continue

                if len(Examples) < 210:
                    Examples += ' ]'

                # end of for line

            # end of for entry

    print "%d entries, %d words, %d L3 headers" % (entries, words, L3headers)

    # report valid headers

    report = '\nas of ' + xmldate.enXMLdate + '\n'
    report += '\n{| border="1" cellpadding="4" cellspacing="0" style="border-collapse: collapse;"\n'
    report += '|-\n| | Header\n| |Occurs\n| |Notes\n'

    for header in sorted(Occurs):
        if header not in Notes: continue
        report += "|-\n| '''<nowiki>" + header + " ||" + str(Occurs) + '||' + Notes + '\n'
       del Occurs
   report += "|}\n"
   wikipedia.setAction('writing report')
   # write the report page
   try:
       reportpage = wikipedia.Page(site, 'User:Robert Ullmann/L3/valid')
       oldreport = reportpage.get()
   except wikipedia.NoPage:
       print "No present report for %s" % reportpage.aslink()
   # file the report
   reportpage.put(report)
   # report invalid headers
   i = k = 0
   report = '\nas of ' + xmldate.enXMLdate + '\n'
   report += '\n{| border="1" cellpadding="4" cellspacing="0" style="border-collapse: collapse;"\n'
   report += '|-\n| | Header\n| |Occurs\n| |Examples\n'
   for header in sorted(Occurs):
       report += "|-\n| " + header + " ||" + str(Occurs) + '||' + Examples + '\n'
       i += 1
       k += Occurs
   report += "|}\n\n"
   report += "* Number of distinct invalid headers: %d\n" % i
   report += "* Total number of invalid headers: %d\n" % k
   wikipedia.setAction('writing report')
   # write the report page
   try:
       reportpage = wikipedia.Page(site, 'User:Robert Ullmann/L3/invalid')
       oldreport = reportpage.get()
   except wikipedia.NoPage:
       print "No present report for %s" % reportpage.aslink()
   # file the report
   reportpage.put(report)


if __name__ == "__main__":

   try:
       main()
   finally:
       wikipedia.stopme()</nowiki>