User:Erutuon/scripts/modifyRussianTranslit.js

Hello, you have come here looking for the meaning of the word User:Erutuon/scripts/modifyRussianTranslit.js. In DICTIOUS you will not only get to know all the dictionary meanings for the word User:Erutuon/scripts/modifyRussianTranslit.js, but we will also tell you about its etymology, its characteristics and you will know how to say User:Erutuon/scripts/modifyRussianTranslit.js in singular and plural. Everything you need to know about the word User:Erutuon/scripts/modifyRussianTranslit.js you have here. The definition of the word User:Erutuon/scripts/modifyRussianTranslit.js will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofUser:Erutuon/scripts/modifyRussianTranslit.js, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.
/* <nowiki>

This script finds all tagged Russian transliterations
on a page, and modifies them so that they show vowel reduction,
palatalization, and hard and soft postalveolars.
There will be numerous errors with the vowel reduction,
and so far assimilation in palatalization is not shown at all,
but at least this makes pronunciation easier to decode in most cases.

Examples:
- Воро́неж > Vorónež > Varón̦iž
- роди́тельный > rodítelʹnyj > rad̦íțil̦nyj
- шить > šitʹ > šyț
- щёлочь > ščóločʹ > śśólać
- хоро́ший > xoróšij > xaróšyj

to do:
- don't reduce vowels of all monosyllables

*/
/* globals $ */

"use strict";

var RussianTranslit = $(":lang(ru-Latn)");

var voiced =	;
var voiceless =	;
var getIndex = {};
var getType = {};
var consonants = "";

for ( let i = 0; i < voiced.length; i++ ) {
	var sound = voiced;
	getIndex = i;
	getType = "voiced";
	if ( sound.length === 1 ) {
		consonants += sound;
	}
}

for ( let i = 0; i < voiceless.length; i++ ) {
	var sound = voiceless;
	getIndex = i;
	getType = "voiceless";
	if ( sound.length === 1 ) {
		consonants += sound;
	}
}

var consonant = "" + comma + "?";
// var consonantRegex = new RegExp(consonant, "g");
var consonantSequence = new RegExp(consonant + "(?:" + consonant + ")+", "g");
var palatalizable = "";

var u = String.fromCodePoint;
var acute = u(0x301);
var grave = u(0x300);
var caron = u(0x30C);
var comma = u(0x326);
var prime = "ʹ";
var doublePrime = "ʺ";
var dottedCircle = u(0x25CC);

var regexCache = {};
var replace = function (text, regex, replacement)
{
	var orig = regex;
	if ( !regexCache ) {
	
		if ( regex.includes("#") && replacement.includes("#") ) {
			regex = regex.replace("#", "((?:<+>)?#)");
			replacement = replacement.replace("#", "\$1");
		}
		
		regex = new RegExp(regex, "g");
		
		regexCache = regex;
	}
	
	return text.replace(regexCache, replacement);
};

RussianTranslit.each(
	function ()
	{
		var $this = $(this);
		var innerHTML = $this.html();
		var origDecomposed = innerHTML.normalize("NFD");
		var decomposed = origDecomposed;
		
		var isHeadword;
		var classAttr = $this.attr("class");
		if ( classAttr ) { 
			isHeadword = classAttr.includes("headword-tr");
		}
		
		var isAffix = /^-/.test(innerHTML) || /-$/.test(innerHTML);
		
		// Mark word boundaries with #.
		decomposed = "#" + decomposed + "#";
		decomposed = replace(decomposed, " ", "#");
		
		/*	-т(ь)ся is pronounced like -тса, at least in reflexive verbs:
			for example, ]. See also ].
		*/
		decomposed = replace(decomposed, "t" + prime + "?sja#", "tsa#");
		
		/*	Remove prime from c, which is rarely palatalized,
			and š, ž, which are never palatalized.
		*/
		decomposed = replace(decomposed, caron + prime, caron);
		decomposed = replace(decomposed, "c" + prime, "c");
		decomposed = replace(decomposed, prime + "o", comma + "jo");
		decomposed = replace(decomposed, prime, comma);
		
		/*
			Reduce unstressed е, о, я (e, o, a) to i or y, a, i or y,
			except for final -е and -я. -е is pronounced as e or a, but
			unfortunately this depends on the part of speech, which
			JavaScript has no way to determine.
			
			Further exceptions:
				- ] (second-person plural indicative)
				- ] (second-person plural imperative)
		*/
		if ( decomposed.includes(acute) || ( isHeadword && isAffix ) )
		{
			var wordBoundary = "(?=#|)";
			var notAccent = "(?=)";
			
			decomposed = replace(decomposed, "je" + wordBoundary, "%%");
			decomposed = replace(decomposed, "ja" + wordBoundary, "&&");
			
			decomposed = replace(decomposed, "()e" + notAccent, "$1y");
			decomposed = replace(decomposed, "()E" + notAccent, "$1Y");
			decomposed = replace(decomposed, "^e" + notAccent, "y");
			decomposed = replace(decomposed, "^E" + notAccent, "Y");
			
			decomposed = replace(decomposed, "e" + notAccent, "i");
			decomposed = replace(decomposed, "E" + notAccent, "I");
			
			decomposed = replace(decomposed, "o" + notAccent, "a");
			decomposed = replace(decomposed, "O" + notAccent, "A");
			
			decomposed = replace(decomposed, "ja" + notAccent, "ji");
			decomposed = replace(decomposed, "Ja" + notAccent, "Ji");
			
			decomposed = replace(decomposed, "%%", "je");
			decomposed = replace(decomposed, "&&", "ja");
		}
		
		// и (i) is pronounced like ы (y) after ц, ш, ж (c, š, ž).
		decomposed = replace(decomposed, "(" + caron + ")i", "$1y");
		decomposed = replace(decomposed, "ci", "cy");
		
		// Change č, šč to ć, śś to indicate that they are soft.
		decomposed = replace(decomposed, "s" + caron + "c" + caron, "śś");
		decomposed = replace(decomposed, "c" + caron, "ć");
		
		// Mark palatalization with comma below: b̦, v̦, g̦, ... .
		decomposed = replace(decomposed, "(" + palatalizable + ")()", "$1" + comma + "$2");
		decomposed = replace(decomposed, "(" + palatalizable + ")j", "$1" + comma);
		
		// Epsilon (representing uniotated е). For example, ]. Probably represents y when unstressed.
		decomposed = replace(decomposed, "ɛ", "e");
		
		// Escape HTML tags and entities.
		var escaped = ;
		var i = 0;
		decomposed = decomposed.replace(
			/<+>|&+;/g,
			function(tag) {
				escaped = tag;
				return "%" + i++;
			}
		);
		
		// Show voicing assimilation.
		
		decomposed = decomposed.normalize("NFC");
		// Decompose ș, ț.
		decomposed = decomposed.replace(
			//g,
			function (letter) {
				return letter.normalize("NFD");
			});
		
		// Process sequences of obstruents.
		decomposed = decomposed.replace(
			consonantSequence,
			function(a) {
				// var types = ;
				
				return "<span style=\"color: red;\">" + a + "</span>";
			}
		);
		
		decomposed = decomposed.replace(
			/%(\d)/g,
			function(wholematch, number)
			{
				number = Number(number);
				return escaped;
			}
		);
		
		decomposed = replace(decomposed, doublePrime, "");
		
		// Remove #.
		decomposed = replace(decomposed, "#", " ");
		decomposed = replace(decomposed, " (.+) ", "$1");
		
		$this.html(decomposed);
	}
);
// </nowiki>