node_modules/node-pty/deps/winpty/src/agent/UnicodeEncodingTest.cc

   1 // Copyright (c) 2015 Ryan Prichard
   2 //
   3 // Permission is hereby granted, free of charge, to any person obtaining a copy
   4 // of this software and associated documentation files (the "Software"), to
   5 // deal in the Software without restriction, including without limitation the
   6 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
   7 // sell copies of the Software, and to permit persons to whom the Software is
   8 // furnished to do so, subject to the following conditions:
   9 //
  10 // The above copyright notice and this permission notice shall be included in
  11 // all copies or substantial portions of the Software.
  12 //
  13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  18 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  19 // IN THE SOFTWARE.
  20
  21 // Encode every code-point using this module and verify that it matches the
  22 // encoding generated using Windows WideCharToMultiByte.
  23
  24 #include "UnicodeEncoding.h"
  25
  26 #include <windows.h>
  27 #include <assert.h>
  28 #include <stdio.h>
  29 #include <string.h>
  30 #include <time.h>
  31
  32 static void correctnessByCode()
  33 {
  34     char mbstr1[4];
  35     char mbstr2[4];
  36     wchar_t wch[2];
  37     for (unsigned int code = 0; code < 0x110000; ++code) {
  38
  39         // Surrogate pair reserved region.
  40         const bool isReserved = (code >= 0xD800 && code <= 0xDFFF);
  41
  42         int mblen1 = encodeUtf8(mbstr1, code);
  43         if (isReserved ? mblen1 != 0 : mblen1 <= 0) {
  44             printf("Error: 0x%04X: mblen1=%d\n", code, mblen1);
  45             continue;
  46         }
  47
  48         int wlen = encodeUtf16(wch, code);
  49         if (isReserved ? wlen != 0 : wlen <= 0) {
  50             printf("Error: 0x%04X: wlen=%d\n", code, wlen);
  51             continue;
  52         }
  53
  54         if (isReserved) {
  55             continue;
  56         }
  57
  58         if (mblen1 != utf8CharLength(mbstr1[0])) {
  59             printf("Error: 0x%04X: mblen1=%d, utf8CharLength(mbstr1[0])=%d\n",
  60                 code, mblen1, utf8CharLength(mbstr1[0]));
  61             continue;
  62         }
  63
  64         if (code != decodeUtf8(mbstr1)) {
  65             printf("Error: 0x%04X: decodeUtf8(mbstr1)=%u\n",
  66                 code, decodeUtf8(mbstr1));
  67             continue;
  68         }
  69
  70         int mblen2 = WideCharToMultiByte(CP_UTF8, 0, wch, wlen, mbstr2, 4, NULL, NULL);
  71         if (mblen1 != mblen2) {
  72             printf("Error: 0x%04X: mblen1=%d, mblen2=%d\n", code, mblen1, mblen2);
  73             continue;
  74         }
  75
  76         if (memcmp(mbstr1, mbstr2, mblen1) != 0) {
  77             printf("Error: 0x%04x: encodings are different\n", code);
  78             continue;
  79         }
  80     }
  81 }
  82
  83 static const char *encodingStr(char (&output)[128], char (&buf)[4])
  84 {
  85     sprintf(output, "Encoding %02X %02X %02X %02X",
  86         static_cast<uint8_t>(buf[0]),
  87         static_cast<uint8_t>(buf[1]),
  88         static_cast<uint8_t>(buf[2]),
  89         static_cast<uint8_t>(buf[3]));
  90     return output;
  91 }
  92
  93 // This test can take a couple of minutes to run.
  94 static void correctnessByUtf8Encoding()
  95 {
  96     for (uint64_t encoding = 0; encoding <= 0xFFFFFFFF; ++encoding) {
  97
  98         char mb[4];
  99         mb[0] = encoding;
 100         mb[1] = encoding >> 8;
 101         mb[2] = encoding >> 16;
 102         mb[3] = encoding >> 24;
 103
 104         const int mblen = utf8CharLength(mb[0]);
 105         if (mblen == 0) {
 106             continue;
 107         }
 108
 109         // Test this module.
 110         const uint32_t code1 = decodeUtf8(mb);
 111         wchar_t ws1[2] = {};
 112         const int wslen1 = encodeUtf16(ws1, code1);
 113
 114         // Test using Windows.  We can't decode a codepoint directly; we have
 115         // to do UTF8->UTF16, then decode the surrogate pair.
 116         wchar_t ws2[2] = {};
 117         const int wslen2 = MultiByteToWideChar(
 118             CP_UTF8, MB_ERR_INVALID_CHARS, mb, mblen, ws2, 2);
 119         const uint32_t code2 =
 120             (wslen2 == 1 ? ws2[0] :
 121              wslen2 == 2 ? decodeSurrogatePair(ws2[0], ws2[1]) :
 122              static_cast<uint32_t>(-1));
 123
 124         // Verify that the two implementations match.
 125         char prefix[128];
 126         if (code1 != code2) {
 127             printf("%s: code1=0x%04x code2=0x%04x\n",
 128                 encodingStr(prefix, mb),
 129                 code1, code2);
 130             continue;
 131         }
 132         if (wslen1 != wslen2) {
 133             printf("%s: wslen1=%d wslen2=%d\n",
 134                 encodingStr(prefix, mb),
 135                 wslen1, wslen2);
 136             continue;
 137         }
 138         if (memcmp(ws1, ws2, wslen1 * sizeof(wchar_t)) != 0) {
 139             printf("%s: ws1 != ws2\n", encodingStr(prefix, mb));
 140             continue;
 141         }
 142     }
 143 }
 144
 145 wchar_t g_wch_TEST[] = { 0xD840, 0xDC00 };
 146 char g_ch_TEST[4];
 147 wchar_t *volatile g_pwch = g_wch_TEST;
 148 char *volatile g_pch = g_ch_TEST;
 149 unsigned int volatile g_code = 0xA2000;
 150
 151 static void performance()
 152 {
 153     {
 154         clock_t start = clock();
 155         for (long long i = 0; i < 250000000LL; ++i) {
 156             int mblen = WideCharToMultiByte(CP_UTF8, 0, g_pwch, 2, g_pch, 4, NULL, NULL);
 157             assert(mblen == 4);
 158         }
 159         clock_t stop = clock();
 160         printf("%.3fns per char\n", (double)(stop - start) / CLOCKS_PER_SEC * 4.0);
 161     }
 162
 163     {
 164         clock_t start = clock();
 165         for (long long i = 0; i < 3000000000LL; ++i) {
 166             int mblen = encodeUtf8(g_pch, g_code);
 167             assert(mblen == 4);
 168         }
 169         clock_t stop = clock();
 170         printf("%.3fns per char\n", (double)(stop - start) / CLOCKS_PER_SEC / 3.0);
 171     }
 172 }
 173
 174 int main()
 175 {
 176     printf("Testing correctnessByCode...\n");
 177     fflush(stdout);
 178     correctnessByCode();
 179
 180     printf("Testing correctnessByUtf8Encoding... (may take a couple minutes)\n");
 181     fflush(stdout);
 182     correctnessByUtf8Encoding();
 183
 184     printf("Testing performance...\n");
 185     fflush(stdout);
 186     performance();
 187
 188     return 0;
 189 }