Hello, you have come here looking for the meaning of the word
User:Hippietrail/domtokenizer.js. In DICTIOUS you will not only get to know all the dictionary meanings for the word
User:Hippietrail/domtokenizer.js, but we will also tell you about its etymology, its characteristics and you will know how to say
User:Hippietrail/domtokenizer.js in singular and plural. Everything you need to know about the word
User:Hippietrail/domtokenizer.js you have here. The definition of the word
User:Hippietrail/domtokenizer.js will help you to be more precise and correct when speaking or writing your texts. Knowing the definition of
User:Hippietrail/domtokenizer.js, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.
// needs at least JavaScript 1.7
function domtokenizer(startnode) {
// public
this.lasttok = null;
this.tok = null;
this.nexttok = null;
// private
this.ungot = false;
this.nextnext = null;
this.eof = false;
this.domgen = domgenerator(startnode);
this.gettok = function() {
this.lasttok = this.tok;
this.tok = this.nexttok;
if (this.ungot) {
this.nexttok = this.nextnext;
this.ungot = false;
} else if (this.eof) {
// because we have one token of lookahead
// we need to be able to go one token past the end
} else {
this.nexttok = this.domgen.next();
if (this.nexttok == null)
this.eof = true;
}
return this.tok;
}
this.ungettok = function() {
this.ungot = true;
this.nextnext = this.nexttok;
this.nexttok = this.tok;
this.tok = this.lasttok;
this.lasttok = null;
}
// lookahead
this.gettok();
}
function domgenerator(startnode) {
var node = startnode;
while (true) {
// EMIT
if (node == null) {
yield null;
break;
}
// tag nodes
else if (node.nodeType == 1)
yield { "t": "s", "n": node };
// text nodes
else if (node.nodeType == 3) {
var txtgen = texttokenizer(node.nodeValue);
var t;
while (t = txtgen.next())
yield t;
}
// other nodes (comments etc)
else
yield { "t": "o", "n": node };
//////////////////////////////////////////////
// WALK
// child of this tag
if (node.firstChild)
node = node.firstChild;
// close this tag then go to sibling or parent
else while (true) {
if (node.nodeType == 1) {
yield { "t": "e", "n": node };
if (node == startnode) {
node = null;
break;
}
}
if (node.nextSibling) {
node = node.nextSibling;
break;
}
node = node.parentNode;
}
}
function texttokenizer(text) {
var stdin = text;
var i = 0;
var c = null;
var cc = -1;
var lookahead = null;
var s = '';
// lookahead
getc();
while (true) {
var isWhite = false;
var isEOL = false;
var isEOF = false;
var isCyr = false;
var isHeb = false;
var isAra = false;
var isCJK = false;
getc();
if (c == null) {
s = null;
isEOF = true;
} else if (c == ',') {
s = c;
} else if (c == ':') {
s = c;
} else if (c == ';') {
s = c;
} else if (c == '(') {
s = c;
} else if (c == ')') {
s = c;
// MediaWiki converts some spaces to non-breaking spaces near punctuation
// This is a feature for the French language and an unexpected surprise for the rest of us!
} else if (c.match(//)) {
s = c;
while (true) {
getc();
if (c != null && c.match(//))
s += c;
else {
ungetc();
break;
}
}
isWhite = true;
if (s == '\r' || s == '\n' || s == '\r\n')
isEOL = true;
// Characters used in language names
} else if (c.match(//)) {
s = c;
while (true) {
getc();
if (c != null && c.match(//))
s += c;
else {
ungetc();
break;
}
}
// Cyrillic characters
} else if (inCyrillic(cc)) {
s = c;
while (true) {
getc();
if (inCyrillic(cc)) {
s += c;
} else {
ungetc();
break;
}
}
isCyr = true;
// Hebrew characters
} else if (inHebrew(cc)) {
s = c;
while (true) {
getc();
if (inHebrew(cc)) {
s += c;
} else {
ungetc();
break;
}
}
isHeb = true;
// Arabic characters
} else if (inArabic(cc)) {
s = c;
while (true) {
getc();
if (inArabic(cc)) {
s += c;
} else {
ungetc();
break;
}
}
isAra = true;
// CJKV characters
} else if (inCJK(cc)) {
s = c;
while (true) {
getc();
if (inCJK(cc)) {
s += c;
} else {
ungetc();
break;
}
}
isCJK = true;
} else {
s = c;
}
// EOF
if (c == null)
yield null;
else {
var retval = { "t": "t", "x": s, "isWhite": isWhite };
if (isEOL) retval.isEOL = true;
yield retval;
}
}
function inCyrillic(c) {
if (c >= 0x0400 && c <= 0x04FF)
return true;
else
return false;
}
function inHebrew(c) {
if (c >= 0x0590 && c <= 0x05FF)
return true;
else
return false;
}
function inArabic(c) {
if (c >= 0x0600 && c <= 0x06FF)
return true;
else
return false;
}
function inCJK(c) {
if ((c >= 0x2E80 && c <= 0x303F)
|| (c >= 0x31C0 && c <= 0x31EF)
|| (c >= 0x3200 && c <= 0x4DBF)
|| (c >= 0x4E00 && c <= 0x9FFF)
|| (c >= 0xF900 && c <= 0xFAFF)
|| (c >= 0xFE30 && c <= 0xFE4F)
// || (c >= 0x20000 && c <= 0x2A6DF)
// || (c >= 0x2F800 && c <= 0x2FA1F)
) {
return true;
} else {
return false;
}
}
function getc() {
c = lookahead;
if (c == null)
cc = -1;
else
cc = c.charCodeAt(0);
lookahead = null;
if (i < stdin.length)
lookahead = stdin.charAt(i);
i++;
}
function ungetc() {
lookahead = c;
i--;
c = stdin.charAt(i);
if (c == null)
cc = -1;
else
cc = c.charCodeAt(0);
}
}
}