node_modules/iconv-lite/encodings/utf16.js

   1 "use strict";
   2 var Buffer = require("safer-buffer").Buffer;
   3
   4 // Note: UTF16-LE (or UCS2) codec is Node.js native. See encodings/internal.js
   5
   6 // == UTF16-BE codec. ==========================================================
   7
   8 exports.utf16be = Utf16BECodec;
   9 function Utf16BECodec() {
  10 }
  11
  12 Utf16BECodec.prototype.encoder = Utf16BEEncoder;
  13 Utf16BECodec.prototype.decoder = Utf16BEDecoder;
  14 Utf16BECodec.prototype.bomAware = true;
  15
  16
  17 // -- Encoding
  18
  19 function Utf16BEEncoder() {
  20 }
  21
  22 Utf16BEEncoder.prototype.write = function(str) {
  23     var buf = Buffer.from(str, 'ucs2');
  24     for (var i = 0; i < buf.length; i += 2) {
  25         var tmp = buf[i]; buf[i] = buf[i+1]; buf[i+1] = tmp;
  26     }
  27     return buf;
  28 }
  29
  30 Utf16BEEncoder.prototype.end = function() {
  31 }
  32
  33
  34 // -- Decoding
  35
  36 function Utf16BEDecoder() {
  37     this.overflowByte = -1;
  38 }
  39
  40 Utf16BEDecoder.prototype.write = function(buf) {
  41     if (buf.length == 0)
  42         return '';
  43
  44     var buf2 = Buffer.alloc(buf.length + 1),
  45         i = 0, j = 0;
  46
  47     if (this.overflowByte !== -1) {
  48         buf2[0] = buf[0];
  49         buf2[1] = this.overflowByte;
  50         i = 1; j = 2;
  51     }
  52
  53     for (; i < buf.length-1; i += 2, j+= 2) {
  54         buf2[j] = buf[i+1];
  55         buf2[j+1] = buf[i];
  56     }
  57
  58     this.overflowByte = (i == buf.length-1) ? buf[buf.length-1] : -1;
  59
  60     return buf2.slice(0, j).toString('ucs2');
  61 }
  62
  63 Utf16BEDecoder.prototype.end = function() {
  64 }
  65
  66
  67 // == UTF-16 codec =============================================================
  68 // Decoder chooses automatically from UTF-16LE and UTF-16BE using BOM and space-based heuristic.
  69 // Defaults to UTF-16LE, as it's prevalent and default in Node.
  70 // http://en.wikipedia.org/wiki/UTF-16 and http://encoding.spec.whatwg.org/#utf-16le
  71 // Decoder default can be changed: iconv.decode(buf, 'utf16', {defaultEncoding: 'utf-16be'});
  72
  73 // Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false).
  74
  75 exports.utf16 = Utf16Codec;
  76 function Utf16Codec(codecOptions, iconv) {
  77     this.iconv = iconv;
  78 }
  79
  80 Utf16Codec.prototype.encoder = Utf16Encoder;
  81 Utf16Codec.prototype.decoder = Utf16Decoder;
  82
  83
  84 // -- Encoding (pass-through)
  85
  86 function Utf16Encoder(options, codec) {
  87     options = options || {};
  88     if (options.addBOM === undefined)
  89         options.addBOM = true;
  90     this.encoder = codec.iconv.getEncoder('utf-16le', options);
  91 }
  92
  93 Utf16Encoder.prototype.write = function(str) {
  94     return this.encoder.write(str);
  95 }
  96
  97 Utf16Encoder.prototype.end = function() {
  98     return this.encoder.end();
  99 }
 100
 101
 102 // -- Decoding
 103
 104 function Utf16Decoder(options, codec) {
 105     this.decoder = null;
 106     this.initialBytes = [];
 107     this.initialBytesLen = 0;
 108
 109     this.options = options || {};
 110     this.iconv = codec.iconv;
 111 }
 112
 113 Utf16Decoder.prototype.write = function(buf) {
 114     if (!this.decoder) {
 115         // Codec is not chosen yet. Accumulate initial bytes.
 116         this.initialBytes.push(buf);
 117         this.initialBytesLen += buf.length;
 118
 119         if (this.initialBytesLen < 16) // We need more bytes to use space heuristic (see below)
 120             return '';
 121
 122         // We have enough bytes -> detect endianness.
 123         var buf = Buffer.concat(this.initialBytes),
 124             encoding = detectEncoding(buf, this.options.defaultEncoding);
 125         this.decoder = this.iconv.getDecoder(encoding, this.options);
 126         this.initialBytes.length = this.initialBytesLen = 0;
 127     }
 128
 129     return this.decoder.write(buf);
 130 }
 131
 132 Utf16Decoder.prototype.end = function() {
 133     if (!this.decoder) {
 134         var buf = Buffer.concat(this.initialBytes),
 135             encoding = detectEncoding(buf, this.options.defaultEncoding);
 136         this.decoder = this.iconv.getDecoder(encoding, this.options);
 137
 138         var res = this.decoder.write(buf),
 139             trail = this.decoder.end();
 140
 141         return trail ? (res + trail) : res;
 142     }
 143     return this.decoder.end();
 144 }
 145
 146 function detectEncoding(buf, defaultEncoding) {
 147     var enc = defaultEncoding || 'utf-16le';
 148
 149     if (buf.length >= 2) {
 150         // Check BOM.
 151         if (buf[0] == 0xFE && buf[1] == 0xFF) // UTF-16BE BOM
 152             enc = 'utf-16be';
 153         else if (buf[0] == 0xFF && buf[1] == 0xFE) // UTF-16LE BOM
 154             enc = 'utf-16le';
 155         else {
 156             // No BOM found. Try to deduce encoding from initial content.
 157             // Most of the time, the content has ASCII chars (U+00**), but the opposite (U+**00) is uncommon.
 158             // So, we count ASCII as if it was LE or BE, and decide from that.
 159             var asciiCharsLE = 0, asciiCharsBE = 0, // Counts of chars in both positions
 160                 _len = Math.min(buf.length - (buf.length % 2), 64); // Len is always even.
 161
 162             for (var i = 0; i < _len; i += 2) {
 163                 if (buf[i] === 0 && buf[i+1] !== 0) asciiCharsBE++;
 164                 if (buf[i] !== 0 && buf[i+1] === 0) asciiCharsLE++;
 165             }
 166
 167             if (asciiCharsBE > asciiCharsLE)
 168                 enc = 'utf-16be';
 169             else if (asciiCharsBE < asciiCharsLE)
 170                 enc = 'utf-16le';
 171         }
 172     }
 173
 174     return enc;
 175 }
 176
 177