Package: libc6 Version: 2.33-8 Severity: important Dear Maintainer, Consider the following reproducer: -- >8 -- #include #include #include int main(int argc, char **) { if(argc > 1) fprintf(stderr, "loc=%s\n", setlocale(LC_ALL, "")); for(int i = 0; i <= 0xFF; ++i) { char bs[] = {i, 0}; mbstate_t ctx = {}; wchar_t wc = -1; printf("%02x: %d, ", i, mbrtowc(&wc, bs, 1, &ctx)); printf("%ld\n", wc); } } -- >8 -- Yielding the following output: -- >8 -- $ ./b | paste - - - - - - - - 00: 0, 0 01: 1, 1 02: 1, 2 03: 1, 3 04: 1, 4 05: 1, 5 06: 1, 6 07: 1, 7 08: 1, 8 09: 1, 9 0a: 1, 10 0b: 1, 11 0c: 1, 12 0d: 1, 13 0e: 1, 14 0f: 1, 15 10: 1, 16 11: 1, 17 12: 1, 18 13: 1, 19 14: 1, 20 15: 1, 21 16: 1, 22 17: 1, 23 18: 1, 24 19: 1, 25 1a: 1, 26 1b: 1, 27 1c: 1, 28 1d: 1, 29 1e: 1, 30 1f: 1, 31 20: 1, 32 21: 1, 33 22: 1, 34 23: 1, 35 24: 1, 36 25: 1, 37 26: 1, 38 27: 1, 39 28: 1, 40 29: 1, 41 2a: 1, 42 2b: 1, 43 2c: 1, 44 2d: 1, 45 2e: 1, 46 2f: 1, 47 30: 1, 48 31: 1, 49 32: 1, 50 33: 1, 51 34: 1, 52 35: 1, 53 36: 1, 54 37: 1, 55 38: 1, 56 39: 1, 57 3a: 1, 58 3b: 1, 59 3c: 1, 60 3d: 1, 61 3e: 1, 62 3f: 1, 63 40: 1, 64 41: 1, 65 42: 1, 66 43: 1, 67 44: 1, 68 45: 1, 69 46: 1, 70 47: 1, 71 48: 1, 72 49: 1, 73 4a: 1, 74 4b: 1, 75 4c: 1, 76 4d: 1, 77 4e: 1, 78 4f: 1, 79 50: 1, 80 51: 1, 81 52: 1, 82 53: 1, 83 54: 1, 84 55: 1, 85 56: 1, 86 57: 1, 87 58: 1, 88 59: 1, 89 5a: 1, 90 5b: 1, 91 5c: 1, 92 5d: 1, 93 5e: 1, 94 5f: 1, 95 60: 1, 96 61: 1, 97 62: 1, 98 63: 1, 99 64: 1, 100 65: 1, 101 66: 1, 102 67: 1, 103 68: 1, 104 69: 1, 105 6a: 1, 106 6b: 1, 107 6c: 1, 108 6d: 1, 109 6e: 1, 110 6f: 1, 111 70: 1, 112 71: 1, 113 72: 1, 114 73: 1, 115 74: 1, 116 75: 1, 117 76: 1, 118 77: 1, 119 78: 1, 120 79: 1, 121 7a: 1, 122 7b: 1, 123 7c: 1, 124 7d: 1, 125 7e: 1, 126 7f: 1, 127 80: -1, -1 81: -1, -1 82: -1, -1 83: -1, -1 84: -1, -1 85: -1, -1 86: -1, -1 87: -1, -1 88: -1, -1 89: -1, -1 8a: -1, -1 8b: -1, -1 8c: -1, -1 8d: -1, -1 8e: -1, -1 8f: -1, -1 90: -1, -1 91: -1, -1 92: -1, -1 93: -1, -1 94: -1, -1 95: -1, -1 96: -1, -1 97: -1, -1 98: -1, -1 99: -1, -1 9a: -1, -1 9b: -1, -1 9c: -1, -1 9d: -1, -1 9e: -1, -1 9f: -1, -1 a0: -1, -1 a1: -1, -1 a2: -1, -1 a3: -1, -1 a4: -1, -1 a5: -1, -1 a6: -1, -1 a7: -1, -1 a8: -1, -1 a9: -1, -1 aa: -1, -1 ab: -1, -1 ac: -1, -1 ad: -1, -1 ae: -1, -1 af: -1, -1 b0: -1, -1 b1: -1, -1 b2: -1, -1 b3: -1, -1 b4: -1, -1 b5: -1, -1 b6: -1, -1 b7: -1, -1 b8: -1, -1 b9: -1, -1 ba: -1, -1 bb: -1, -1 bc: -1, -1 bd: -1, -1 be: -1, -1 bf: -1, -1 c0: -1, -1 c1: -1, -1 c2: -1, -1 c3: -1, -1 c4: -1, -1 c5: -1, -1 c6: -1, -1 c7: -1, -1 c8: -1, -1 c9: -1, -1 ca: -1, -1 cb: -1, -1 cc: -1, -1 cd: -1, -1 ce: -1, -1 cf: -1, -1 d0: -1, -1 d1: -1, -1 d2: -1, -1 d3: -1, -1 d4: -1, -1 d5: -1, -1 d6: -1, -1 d7: -1, -1 d8: -1, -1 d9: -1, -1 da: -1, -1 db: -1, -1 dc: -1, -1 dd: -1, -1 de: -1, -1 df: -1, -1 e0: -1, -1 e1: -1, -1 e2: -1, -1 e3: -1, -1 e4: -1, -1 e5: -1, -1 e6: -1, -1 e7: -1, -1 e8: -1, -1 e9: -1, -1 ea: -1, -1 eb: -1, -1 ec: -1, -1 ed: -1, -1 ee: -1, -1 ef: -1, -1 f0: -1, -1 f1: -1, -1 f2: -1, -1 f3: -1, -1 f4: -1, -1 f5: -1, -1 f6: -1, -1 f7: -1, -1 f8: -1, -1 f9: -1, -1 fa: -1, -1 fb: -1, -1 fc: -1, -1 fd: -1, -1 fe: -1, -1 ff: -1, -1 $ LC_ALL=POSIX ./b _ | paste - - - - - - - - loc=C 00: 0, 0 01: 1, 1 02: 1, 2 03: 1, 3 04: 1, 4 05: 1, 5 06: 1, 6 07: 1, 7 08: 1, 8 09: 1, 9 0a: 1, 10 0b: 1, 11 0c: 1, 12 0d: 1, 13 0e: 1, 14 0f: 1, 15 10: 1, 16 11: 1, 17 12: 1, 18 13: 1, 19 14: 1, 20 15: 1, 21 16: 1, 22 17: 1, 23 18: 1, 24 19: 1, 25 1a: 1, 26 1b: 1, 27 1c: 1, 28 1d: 1, 29 1e: 1, 30 1f: 1, 31 20: 1, 32 21: 1, 33 22: 1, 34 23: 1, 35 24: 1, 36 25: 1, 37 26: 1, 38 27: 1, 39 28: 1, 40 29: 1, 41 2a: 1, 42 2b: 1, 43 2c: 1, 44 2d: 1, 45 2e: 1, 46 2f: 1, 47 30: 1, 48 31: 1, 49 32: 1, 50 33: 1, 51 34: 1, 52 35: 1, 53 36: 1, 54 37: 1, 55 38: 1, 56 39: 1, 57 3a: 1, 58 3b: 1, 59 3c: 1, 60 3d: 1, 61 3e: 1, 62 3f: 1, 63 40: 1, 64 41: 1, 65 42: 1, 66 43: 1, 67 44: 1, 68 45: 1, 69 46: 1, 70 47: 1, 71 48: 1, 72 49: 1, 73 4a: 1, 74 4b: 1, 75 4c: 1, 76 4d: 1, 77 4e: 1, 78 4f: 1, 79 50: 1, 80 51: 1, 81 52: 1, 82 53: 1, 83 54: 1, 84 55: 1, 85 56: 1, 86 57: 1, 87 58: 1, 88 59: 1, 89 5a: 1, 90 5b: 1, 91 5c: 1, 92 5d: 1, 93 5e: 1, 94 5f: 1, 95 60: 1, 96 61: 1, 97 62: 1, 98 63: 1, 99 64: 1, 100 65: 1, 101 66: 1, 102 67: 1, 103 68: 1, 104 69: 1, 105 6a: 1, 106 6b: 1, 107 6c: 1, 108 6d: 1, 109 6e: 1, 110 6f: 1, 111 70: 1, 112 71: 1, 113 72: 1, 114 73: 1, 115 74: 1, 116 75: 1, 117 76: 1, 118 77: 1, 119 78: 1, 120 79: 1, 121 7a: 1, 122 7b: 1, 123 7c: 1, 124 7d: 1, 125 7e: 1, 126 7f: 1, 127 80: -1, -1 81: -1, -1 82: -1, -1 83: -1, -1 84: -1, -1 85: -1, -1 86: -1, -1 87: -1, -1 88: -1, -1 89: -1, -1 8a: -1, -1 8b: -1, -1 8c: -1, -1 8d: -1, -1 8e: -1, -1 8f: -1, -1 90: -1, -1 91: -1, -1 92: -1, -1 93: -1, -1 94: -1, -1 95: -1, -1 96: -1, -1 97: -1, -1 98: -1, -1 99: -1, -1 9a: -1, -1 9b: -1, -1 9c: -1, -1 9d: -1, -1 9e: -1, -1 9f: -1, -1 a0: -1, -1 a1: -1, -1 a2: -1, -1 a3: -1, -1 a4: -1, -1 a5: -1, -1 a6: -1, -1 a7: -1, -1 a8: -1, -1 a9: -1, -1 aa: -1, -1 ab: -1, -1 ac: -1, -1 ad: -1, -1 ae: -1, -1 af: -1, -1 b0: -1, -1 b1: -1, -1 b2: -1, -1 b3: -1, -1 b4: -1, -1 b5: -1, -1 b6: -1, -1 b7: -1, -1 b8: -1, -1 b9: -1, -1 ba: -1, -1 bb: -1, -1 bc: -1, -1 bd: -1, -1 be: -1, -1 bf: -1, -1 c0: -1, -1 c1: -1, -1 c2: -1, -1 c3: -1, -1 c4: -1, -1 c5: -1, -1 c6: -1, -1 c7: -1, -1 c8: -1, -1 c9: -1, -1 ca: -1, -1 cb: -1, -1 cc: -1, -1 cd: -1, -1 ce: -1, -1 cf: -1, -1 d0: -1, -1 d1: -1, -1 d2: -1, -1 d3: -1, -1 d4: -1, -1 d5: -1, -1 d6: -1, -1 d7: -1, -1 d8: -1, -1 d9: -1, -1 da: -1, -1 db: -1, -1 dc: -1, -1 dd: -1, -1 de: -1, -1 df: -1, -1 e0: -1, -1 e1: -1, -1 e2: -1, -1 e3: -1, -1 e4: -1, -1 e5: -1, -1 e6: -1, -1 e7: -1, -1 e8: -1, -1 e9: -1, -1 ea: -1, -1 eb: -1, -1 ec: -1, -1 ed: -1, -1 ee: -1, -1 ef: -1, -1 f0: -1, -1 f1: -1, -1 f2: -1, -1 f3: -1, -1 f4: -1, -1 f5: -1, -1 f6: -1, -1 f7: -1, -1 f8: -1, -1 f9: -1, -1 fa: -1, -1 fb: -1, -1 fc: -1, -1 fd: -1, -1 fe: -1, -1 ff: -1, -1 -- >8 -- This breaks all programs that expect to process text/data portably, since in LC_ALL=C half of all bytes collapse to one character (for sort this means that they all collate equally, &c., &c.)! Consider a diff of XBD 6.2 ("Character Encoding"), Issue 7 vs Issue 7 TC2: -- >8 -- @@ -1768,9 +1664,13 @@

6.2 Character Encoding

-

The POSIX locale contains the characters in Portable Character Set , which have the properties listed -in LC_CTYPE . In other locales, the presence, meaning, and -representation of any additional characters are locale-specific.

+

The POSIX locale shall contain 256 single-byte characters including the characters in Portable Character +Set and Non-Portable Control Characters, which have the properties listed in LC_CTYPE. It is unspecified whether characters not listed in those two tables +are classified as punct or cntrl, or neither. Other locales shall contain the characters in Portable Character Set and may contain any or all of the control characters identified in Non-Portable Control Characters; the presence, meaning, and representation of any additional characters are +locale-specific.

In locales other than the POSIX locale, a character may have a state-dependent encoding. There are two types of these encodings:

-- >8 -- This text is widely supported with global changes later originating from bug 674: > An invalid character sequence is detected. In the POSIX locale an EILSEQ error cannot occur since all byte values are valid characters.[/CX] > In the POSIX locale each byte is a valid single-byte character, and therefore this problem is avoided. &c. This text is unchanged in Issue 8 Draft 2.1. Agonised, наб -- System Information: Debian Release: bookworm/sid APT prefers unstable APT policy: (500, 'unstable') Architecture: x32 (x86_64) Foreign Architectures: amd64, i386 Kernel: Linux 5.18.0-3-amd64 (SMP w/2 CPU threads; PREEMPT) Kernel taint flags: TAINT_PROPRIETARY_MODULE, TAINT_OOT_MODULE, TAINT_UNSIGNED_MODULE Locale: LANG=en_GB.UTF-8, LC_CTYPE=en_GB.UTF-8 (charmap=UTF-8), LANGUAGE not set Shell: /bin/sh linked to /usr/bin/dash Init: systemd (via /run/systemd/system) LSM: AppArmor: enabled Versions of packages libc6 depends on: ii libgcc-s1 12.1.0-2 Versions of packages libc6 recommends: ii libidn2-0 2.3.3-1 Versions of packages libc6 suggests: ii debconf [debconf-2.0] 1.5.79 pn glibc-doc ii libc-l10n 2.33-8 ii libnss-nis 3.1-4 ii libnss-nisplus 1.3-4 ii locales 2.33-8 -- debconf information excluded