User:Tbot/code/script

Hello, you have come here looking for the meaning of the word User:Tbot/code/script. In DICTIOUS you will not only get to know all the dictionary meanings for the word User:Tbot/code/script, but we will also tell you about its etymology, its characteristics and you will know how to say User:Tbot/code/script in singular and plural. Everything you need to know about the word User:Tbot/code/script you have here. The definition of the word User:Tbot/code/script will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofUser:Tbot/code/script, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.



#!/usr/bin/python
# -*- coding: utf-8  -*-
# wikipath en wiktionary User:Tbot/code/script


"""
Given a word and language code, return a script template for the en.wikt

"""

# table of scripts, each is lowest character code point, highest code + 1, ISO script

Scs = [
         (0x0370, 0x0400, 'Grek'),
         (0x0400, 0x0530, 'Cyrl'),
         (0x0530, 0x0590, 'Armn'),
         (0x0590, 0x0600, 'Hebr'),
         (0x0600, 0x0700, 'Arab'),
         (0x0700, 0x0750, 'Syrc'),
         (0x0750, 0x0780, 'Arab'),
         (0x0900, 0x0980, 'Deva'),
         (0x0980, 0x0A00, 'Beng'),
         (0x0A00, 0x0A80, 'Guru'),
         (0x0A80, 0x0B00, 'Gujr'),
         (0x0B00, 0x0B80, 'Orya'),
         (0x0B80, 0x0C00, 'Taml'),
         (0x0C00, 0x0C80, 'Telu'),
         (0x0C80, 0x0D00, 'Knda'),
         (0x0D00, 0x0D80, 'Mlym'),
         (0x0D80, 0x0E00, 'Sinh'),
         (0x0E00, 0x0E80, 'Thai'),
         (0x0E80, 0x0F00, 'Laoo'),
         (0x0F00, 0x1000, 'Tibt'),
         (0x1000, 0x10A0, 'Mymr'),
         (0x10A0, 0x1100, 'Geor'),
         (0x1100, 0x1200, 'Hang'),     # jamo
         (0x1200, 0x13A0, 'Ethi'),
         (0x13A0, 0x1400, 'Cher'),
         (0x1400, 0x1680, 'Cans'),
         (0x3040, 0x3100, 'Jpan'),
         (0x3400, 0xA000, 'Hani'),     # Han Ext A and Unified
         (0xAC00, 0xD800, 'Hang'),
         (0x20000, 0x2A6D7, 'Hant') ]  # Han Ext B, mostly archaic so assume traditional


# table of combinations for specific languages that have particular templates

Lsp = { 'fa-Arab':'fa-Arab', 'ur-Arab':'ur-Arab', 'pa-Arab':'pa-Arab', 'ku-Arab':'ku-Arab',
        'grc-Grek':'polytonic', 'ja-Hani':'Jpan', 'ja-Hant':'Jpan' }
# need some more ...

# all recognized script templates, including redirects, which we do not canonicalize
Scripts = set(['ARchar', 'KUchar', 'FAchar', 'THchar', 'URchar', 'Arab', 'fa-Arab', 'ur-Arab',
               'pa-Arab', 'ku-Arab',
               'THchar', 'polytonic', 'Hebr', 'Beng', 'Hant', 'Hani', 'Jpan', 'Grek',
               'Cyrl', 'Deva', 'Sryc', 'Hang', 'RUchar', 'JAchar', 'Hayeren'])
for low, high, scode in Scs: Scripts.add(scode) # make sure we have all of those

def script(word, lc, report = False):

    if not word: return ''
    a = ord(word)
    if a >= 0xd800 and a < 0xdc00:
        if len(word) < 2: return ''
        b = ord(word)
        # "UTF-16" crap:
        a = (a - 0xd800) * 1024 + (b - 0xdc00) + 0x10000

    sc = ''
    for low, high, scode in Scs:
        if a >= low and a < high:
            sc = scode
            break

    if sc and lc + '-' + sc in Lsp: sc = Lsp

    if report and not sc and a > 0x0370: print "no match for script for char code %x" % a

    return sc

def scriptp(sc):

    if sc in Scripts: return True
    return False