X-Git-Url: https://git.josue.xyz/?p=VSoRC%2F.git;a=blobdiff_plain;f=node_modules%2Fxterm%2Fsrc%2Fcommon%2Finput%2FTextDecoder.ts;fp=node_modules%2Fxterm%2Fsrc%2Fcommon%2Finput%2FTextDecoder.ts;h=397d25a7805e60b35ecfc9265f9ea1afb71fbd22;hp=0000000000000000000000000000000000000000;hb=4339da12467b75fb8b6ca831f4bf0081c485ed2c;hpb=af450fde25a9ccf4767b29254c463ffb8ef25237 diff --git a/node_modules/xterm/src/common/input/TextDecoder.ts b/node_modules/xterm/src/common/input/TextDecoder.ts new file mode 100644 index 0000000..397d25a --- /dev/null +++ b/node_modules/xterm/src/common/input/TextDecoder.ts @@ -0,0 +1,342 @@ +/** + * Copyright (c) 2019 The xterm.js authors. All rights reserved. + * @license MIT + */ + +/** + * Polyfill - Convert UTF32 codepoint into JS string. + * Note: The built-in String.fromCodePoint happens to be much slower + * due to additional sanity checks. We can avoid them since + * we always operate on legal UTF32 (granted by the input decoders) + * and use this faster version instead. + */ +export function stringFromCodePoint(codePoint: number): string { + if (codePoint > 0xFFFF) { + codePoint -= 0x10000; + return String.fromCharCode((codePoint >> 10) + 0xD800) + String.fromCharCode((codePoint % 0x400) + 0xDC00); + } + return String.fromCharCode(codePoint); +} + +/** + * Convert UTF32 char codes into JS string. + * Basically the same as `stringFromCodePoint` but for multiple codepoints + * in a loop (which is a lot faster). + */ +export function utf32ToString(data: Uint32Array, start: number = 0, end: number = data.length): string { + let result = ''; + for (let i = start; i < end; ++i) { + let codepoint = data[i]; + if (codepoint > 0xFFFF) { + // JS strings are encoded as UTF16, thus a non BMP codepoint gets converted into a surrogate pair + // conversion rules: + // - subtract 0x10000 from code point, leaving a 20 bit number + // - add high 10 bits to 0xD800 --> first surrogate + // - add low 10 bits to 0xDC00 --> second surrogate + codepoint -= 0x10000; + result += String.fromCharCode((codepoint >> 10) + 0xD800) + String.fromCharCode((codepoint % 0x400) + 0xDC00); + } else { + result += String.fromCharCode(codepoint); + } + } + return result; +} + +/** + * StringToUtf32 - decodes UTF16 sequences into UTF32 codepoints. + * To keep the decoder in line with JS strings it handles single surrogates as UCS2. + */ +export class StringToUtf32 { + private _interim: number = 0; + + /** + * Clears interim and resets decoder to clean state. + */ + public clear(): void { + this._interim = 0; + } + + /** + * Decode JS string to UTF32 codepoints. + * The methods assumes stream input and will store partly transmitted + * surrogate pairs and decode them with the next data chunk. + * Note: The method does no bound checks for target, therefore make sure + * the provided input data does not exceed the size of `target`. + * Returns the number of written codepoints in `target`. + */ + decode(input: string, target: Uint32Array): number { + const length = input.length; + + if (!length) { + return 0; + } + + let size = 0; + let startPos = 0; + + // handle leftover surrogate high + if (this._interim) { + const second = input.charCodeAt(startPos++); + if (0xDC00 <= second && second <= 0xDFFF) { + target[size++] = (this._interim - 0xD800) * 0x400 + second - 0xDC00 + 0x10000; + } else { + // illegal codepoint (USC2 handling) + target[size++] = this._interim; + target[size++] = second; + } + this._interim = 0; + } + + for (let i = startPos; i < length; ++i) { + const code = input.charCodeAt(i); + // surrogate pair first + if (0xD800 <= code && code <= 0xDBFF) { + if (++i >= length) { + this._interim = code; + return size; + } + const second = input.charCodeAt(i); + if (0xDC00 <= second && second <= 0xDFFF) { + target[size++] = (code - 0xD800) * 0x400 + second - 0xDC00 + 0x10000; + } else { + // illegal codepoint (USC2 handling) + target[size++] = code; + target[size++] = second; + } + continue; + } + target[size++] = code; + } + return size; + } +} + +/** + * Utf8Decoder - decodes UTF8 byte sequences into UTF32 codepoints. + */ +export class Utf8ToUtf32 { + public interim: Uint8Array = new Uint8Array(3); + + /** + * Clears interim bytes and resets decoder to clean state. + */ + public clear(): void { + this.interim.fill(0); + } + + /** + * Decodes UTF8 byte sequences in `input` to UTF32 codepoints in `target`. + * The methods assumes stream input and will store partly transmitted bytes + * and decode them with the next data chunk. + * Note: The method does no bound checks for target, therefore make sure + * the provided data chunk does not exceed the size of `target`. + * Returns the number of written codepoints in `target`. + */ + decode(input: Uint8Array, target: Uint32Array): number { + const length = input.length; + + if (!length) { + return 0; + } + + let size = 0; + let byte1: number; + let byte2: number; + let byte3: number; + let byte4: number; + let codepoint = 0; + let startPos = 0; + + // handle leftover bytes + if (this.interim[0]) { + let discardInterim = false; + let cp = this.interim[0]; + cp &= ((((cp & 0xE0) === 0xC0)) ? 0x1F : (((cp & 0xF0) === 0xE0)) ? 0x0F : 0x07); + let pos = 0; + let tmp: number; + while ((tmp = this.interim[++pos] & 0x3F) && pos < 4) { + cp <<= 6; + cp |= tmp; + } + // missing bytes - read ahead from input + const type = (((this.interim[0] & 0xE0) === 0xC0)) ? 2 : (((this.interim[0] & 0xF0) === 0xE0)) ? 3 : 4; + const missing = type - pos; + while (startPos < missing) { + if (startPos >= length) { + return 0; + } + tmp = input[startPos++]; + if ((tmp & 0xC0) !== 0x80) { + // wrong continuation, discard interim bytes completely + startPos--; + discardInterim = true; + break; + } else { + // need to save so we can continue short inputs in next call + this.interim[pos++] = tmp; + cp <<= 6; + cp |= tmp & 0x3F; + } + } + if (!discardInterim) { + // final test is type dependent + if (type === 2) { + if (cp < 0x80) { + // wrong starter byte + startPos--; + } else { + target[size++] = cp; + } + } else if (type === 3) { + if (cp < 0x0800 || (cp >= 0xD800 && cp <= 0xDFFF)) { + // illegal codepoint + } else { + target[size++] = cp; + } + } else { + if (cp < 0x010000 || cp > 0x10FFFF) { + // illegal codepoint + } else { + target[size++] = cp; + } + } + } + this.interim.fill(0); + } + + // loop through input + const fourStop = length - 4; + let i = startPos; + while (i < length) { + /** + * ASCII shortcut with loop unrolled to 4 consecutive ASCII chars. + * This is a compromise between speed gain for ASCII + * and penalty for non ASCII: + * For best ASCII performance the char should be stored directly into target, + * but even a single attempt to write to target and compare afterwards + * penalizes non ASCII really bad (-50%), thus we load the char into byteX first, + * which reduces ASCII performance by ~15%. + * This trial for ASCII reduces non ASCII performance by ~10% which seems acceptible + * compared to the gains. + * Note that this optimization only takes place for 4 consecutive ASCII chars, + * for any shorter it bails out. Worst case - all 4 bytes being read but + * thrown away due to the last being a non ASCII char (-10% performance). + */ + while (i < fourStop + && !((byte1 = input[i]) & 0x80) + && !((byte2 = input[i + 1]) & 0x80) + && !((byte3 = input[i + 2]) & 0x80) + && !((byte4 = input[i + 3]) & 0x80)) + { + target[size++] = byte1; + target[size++] = byte2; + target[size++] = byte3; + target[size++] = byte4; + i += 4; + } + + // reread byte1 + byte1 = input[i++]; + + // 1 byte + if (byte1 < 0x80) { + target[size++] = byte1; + + // 2 bytes + } else if ((byte1 & 0xE0) === 0xC0) { + if (i >= length) { + this.interim[0] = byte1; + return size; + } + byte2 = input[i++]; + if ((byte2 & 0xC0) !== 0x80) { + // wrong continuation + i--; + continue; + } + codepoint = (byte1 & 0x1F) << 6 | (byte2 & 0x3F); + if (codepoint < 0x80) { + // wrong starter byte + i--; + continue; + } + target[size++] = codepoint; + + // 3 bytes + } else if ((byte1 & 0xF0) === 0xE0) { + if (i >= length) { + this.interim[0] = byte1; + return size; + } + byte2 = input[i++]; + if ((byte2 & 0xC0) !== 0x80) { + // wrong continuation + i--; + continue; + } + if (i >= length) { + this.interim[0] = byte1; + this.interim[1] = byte2; + return size; + } + byte3 = input[i++]; + if ((byte3 & 0xC0) !== 0x80) { + // wrong continuation + i--; + continue; + } + codepoint = (byte1 & 0x0F) << 12 | (byte2 & 0x3F) << 6 | (byte3 & 0x3F); + if (codepoint < 0x0800 || (codepoint >= 0xD800 && codepoint <= 0xDFFF)) { + // illegal codepoint, no i-- here + continue; + } + target[size++] = codepoint; + + // 4 bytes + } else if ((byte1 & 0xF8) === 0xF0) { + if (i >= length) { + this.interim[0] = byte1; + return size; + } + byte2 = input[i++]; + if ((byte2 & 0xC0) !== 0x80) { + // wrong continuation + i--; + continue; + } + if (i >= length) { + this.interim[0] = byte1; + this.interim[1] = byte2; + return size; + } + byte3 = input[i++]; + if ((byte3 & 0xC0) !== 0x80) { + // wrong continuation + i--; + continue; + } + if (i >= length) { + this.interim[0] = byte1; + this.interim[1] = byte2; + this.interim[2] = byte3; + return size; + } + byte4 = input[i++]; + if ((byte4 & 0xC0) !== 0x80) { + // wrong continuation + i--; + continue; + } + codepoint = (byte1 & 0x07) << 18 | (byte2 & 0x3F) << 12 | (byte3 & 0x3F) << 6 | (byte4 & 0x3F); + if (codepoint < 0x010000 || codepoint > 0x10FFFF) { + // illegal codepoint, no i-- here + continue; + } + target[size++] = codepoint; + } else { + // illegal byte, just skip + } + } + return size; + } +}