.config/coc/extensions/node_modules/coc-prettier/node_modules/chardet/encoding/utf8.js

   1
   2 var Match = require ('../match');
   3
   4 /**
   5  * Charset recognizer for UTF-8
   6  */
   7 module.exports = function() {
   8   this.name = function() {
   9     return 'UTF-8';
  10   };
  11   this.match = function(det) {
  12
  13     var hasBOM = false,
  14       numValid = 0,
  15       numInvalid = 0,
  16       input = det.fRawInput,
  17       trailBytes = 0,
  18       confidence;
  19
  20     if (det.fRawLength >= 3 &&
  21       (input[0] & 0xff) == 0xef && (input[1] & 0xff) == 0xbb && (input[2] & 0xff) == 0xbf) {
  22       hasBOM = true;
  23     }
  24
  25     // Scan for multi-byte sequences
  26     for (var i = 0; i < det.fRawLength; i++) {
  27       var b = input[i];
  28       if ((b & 0x80) == 0)
  29         continue; // ASCII
  30
  31       // Hi bit on char found.  Figure out how long the sequence should be
  32       if ((b & 0x0e0) == 0x0c0) {
  33         trailBytes = 1;
  34       } else if ((b & 0x0f0) == 0x0e0) {
  35         trailBytes = 2;
  36       } else if ((b & 0x0f8) == 0xf0) {
  37         trailBytes = 3;
  38       } else {
  39         numInvalid++;
  40         if (numInvalid > 5)
  41           break;
  42         trailBytes = 0;
  43       }
  44
  45       // Verify that we've got the right number of trail bytes in the sequence
  46       for (;;) {
  47         i++;
  48         if (i >= det.fRawLength)
  49           break;
  50
  51         if ((input[i] & 0xc0) != 0x080) {
  52           numInvalid++;
  53           break;
  54         }
  55         if (--trailBytes == 0) {
  56           numValid++;
  57           break;
  58         }
  59       }
  60     }
  61
  62     // Cook up some sort of confidence score, based on presense of a BOM
  63     //    and the existence of valid and/or invalid multi-byte sequences.
  64     confidence = 0;
  65     if (hasBOM && numInvalid == 0)
  66       confidence = 100;
  67     else if (hasBOM && numValid > numInvalid * 10)
  68       confidence = 80;
  69     else if (numValid > 3 && numInvalid == 0)
  70       confidence = 100;
  71     else if (numValid > 0 && numInvalid == 0)
  72       confidence = 80;
  73     else if (numValid == 0 && numInvalid == 0)
  74       // Plain ASCII.
  75       confidence = 10;
  76     else if (numValid > numInvalid * 10)
  77       // Probably corruput utf-8 data.  Valid sequences aren't likely by chance.
  78       confidence = 25;
  79     else
  80       return null
  81
  82     return new Match(det, this, confidence);
  83   };
  84 };