#!/usr/bin/awk -f function reset() { krivi = -1; sisalla = 0; delete kohdat; } function hae_avain(txt) { gsub("^ *", "", txt); gsub(" *$", "", txt); if ( match(txt, "{{.*}}") ) return assoc; return txt; } BEGIN { sisalla = 0; # 1, jos ollaan {{ylä}}- ja {{ala}}-tagien välissä krivi = -1; # kohdan alirivi assoc = "afar"; assoc = "abhaasi"; assoc = "avesta"; assoc = "afrikaans"; assoc = "akan"; assoc = "amhara"; assoc = "aragonia"; assoc = "arabia"; assoc = "assami"; assoc = "avaari"; assoc = "aimara"; assoc = "azeri"; assoc = "baškiiri"; assoc = "valkovenäjä"; assoc = "bulgaria"; assoc = "bihari"; assoc = "bislama"; assoc = "bambara"; assoc = "bengali"; assoc = "tiibet"; assoc = "bretoni"; assoc = "bosnia"; assoc = "katalaani"; assoc = "tšetšeeni"; assoc = "chamorro"; assoc = "korsika"; assoc = "cree"; assoc = "tšekki"; assoc = "kirkkoslaavi"; assoc = "tšuvassi"; assoc = "kymri"; assoc = "tanska"; assoc = "saksa"; assoc = "divehi"; assoc = "bhutani"; assoc = "ewe"; assoc = "kreikka"; assoc = "englanti"; assoc = "esperanto"; assoc = "espanja"; assoc = "viro"; assoc = "baski"; assoc = "persia"; assoc = "fulfulde"; assoc = "suomi"; assoc = "fidži"; assoc = "fääri"; assoc = "ranska"; assoc = "friisi"; assoc = "iiri"; assoc = "skotti"; assoc = "galicia"; assoc = "guarani"; assoc = "gudžarati"; assoc = "manx"; assoc = "hausa"; assoc = "heprea"; assoc = "hindi"; assoc = "hiri-motu"; assoc = "kroatia"; assoc = "haiti"; assoc = "unkari"; assoc = "armenia"; assoc = "herero"; assoc = "interlingua"; assoc = "indonesia"; assoc = "interlingua"; assoc = "igbo"; assoc = "pohjois-ji"; assoc = "inupiak"; assoc = "ido"; assoc = "islanti"; assoc = "italia"; assoc = "eskimo"; assoc = "japani"; assoc = "jaava"; assoc = "gruusia"; assoc = "kongo"; assoc = "kikuju"; assoc = "kuanjama"; assoc = "kazakki"; assoc = "grönlanti"; assoc = "khmer"; assoc = "kannada"; assoc = "korea"; assoc = "kanuri"; assoc = "kašmiri"; assoc = "kurdi"; assoc = "komi"; assoc = "korni"; assoc = "kirgiisi"; assoc = "latina"; assoc = "letzeburg"; assoc = "ganda"; assoc = "limburgi"; assoc = "lingala"; assoc = "lao"; assoc = "liettua"; assoc = "luba"; assoc = "latvia"; assoc = "malagasi"; assoc = "marshalli"; assoc = "maori"; assoc = "makedonia"; assoc = "malajalam"; assoc = "mongoli"; assoc = "moldavia"; assoc = "marathi"; assoc = "malaiji"; assoc = "malta"; assoc = "burma"; assoc = "nauru"; assoc = "kirjanorja"; assoc = "pohjoisndebel"; assoc = "nepali"; assoc = "ambo"; assoc = "hollanti"; assoc = "uusnorja"; assoc = "norja"; assoc = "eteländebel"; assoc = "navaho"; assoc = "njandža"; assoc = "oksitaani"; assoc = "ojibwa"; assoc = "galla"; assoc = "orija"; assoc = "osseetti"; assoc = "pandžabi"; assoc = "pali"; assoc = "puola"; assoc = "afgaani"; assoc = "portugali"; assoc = "ketšua"; assoc = "retoromaani"; assoc = "rundi"; assoc = "romania"; assoc = "venäjä"; assoc = "ruanda"; assoc = "sanskrit"; assoc = "sardi"; assoc = "sindhi"; assoc = "pohjoissaame"; assoc = "sango"; assoc = "singali"; assoc = "slovakki"; assoc = "sloveeni"; assoc = "samoa"; assoc = "shona"; assoc = "somali"; assoc = "albania"; assoc = "serbia"; assoc = "siswati"; assoc = "sotho"; assoc = "sunda"; assoc = "ruotsi"; assoc = "suahili"; assoc = "tamili"; assoc = "telugu"; assoc = "tadžikki"; assoc = "thai"; assoc = "tigrinja"; assoc = "turkmeeni"; assoc = "tagalog"; assoc = "tšwana"; assoc = "tonga"; assoc = "turkki"; assoc = "tsonga"; assoc = "tataari"; assoc = "twi"; assoc = "tahiti"; assoc = "uiguuri"; assoc = "ukraina"; assoc = "urdu"; assoc = "uzbekki"; assoc = "venda"; assoc = "vietnam"; assoc = "volapük"; assoc = "valloni"; assoc = "wolof"; assoc = "xhosa"; assoc = "jiddi"; assoc = "joruba"; assoc = "tšuang"; assoc = "kiina"; assoc = "zulu"; assoc = "muinaisenglanti"; assoc = "mari"; assoc = "keskienglanti"; assoc = "keskiranska"; assoc = "muinaisranska"; assoc = "keskiyläsaksa"; assoc = "muinaisyläsaksa"; assoc = "muinaiskreikka"; assoc = "inkeroinen"; assoc = "mokša"; assoc = "ersä"; assoc = "alasaksa"; assoc = "selkuppi"; assoc = "eteläsaame"; assoc = "luulajansaame"; assoc = "inarinsaame"; assoc = "koltansaame"; assoc = "tuvalu"; assoc = "udmurtti"; assoc = "vepsä"; assoc = "vatja"; } /^{{ylä}}/ { sisalla = 1; next; } /^{{keski}}/ { next; } /^{{ala}}/ { sisalla = 0; n = asorti(kohdat, ind); n2 = int(n/2 + 0.5); pituus = 0; print "{{ylä}}" for (ci = i = 1; i <= n; i++ && ci++) { if ( pituudet] ) { pituus = pituudet]; a = ci-1 # pituus nykyiseen asti l = n-a-pituus; # pituus nykyisestä loppuun if ( a + pituus > n2 ) { if ( a <= l ) { n2 = a+pituus; } else { print "{{keski}}" ci=1; n2 = n; } } } print kohdat] } print "{{ala}}" reset(); next; } # Alikohta sisalla == 1 && /^\*/ { if (avain == "") { print "VIRHE: " NR ": Yksittäinen alikohta"; } kohdat = $0; # print "löytyi: : " kohdat; pituudet++; next; } # Varsinainen kohta sisalla == 1 && /^\*/ { # muu kuin /^\*/ if ( match($0, /^\*+/) ) { krivi = 0 vali = substr($0, 2, RLENGTH - 1); avain = hae_avain(vali); if ( avain ) { kohdat = $0; pituudet = 1; # print "löytyi: : " kohdat; } else print "VIRHE: " NR ": Tuntematon malline: " vali } else { print "VIRHE: " NR ": Omituinen muoto: " $0; } next; } sisalla == 1 && /^*$/ { #skipataan tyhjät next; } sisalla == 1 { # muu rivi print "VIRHE: " NR ": Outo rivi: " $0; exit 1; next; } sisalla == 0 { print $0; } END { }