1 module hunt.system.Locale;
2
3 import hunt.Functions;
4 import hunt.logging;
5
6 import core.stdc.locale;
7 import std.format;
8 import std.process;
9 import std.string;
10
11 // dfmt off
12 version(Posix) {
13
14 enum _NL_CTYPE_CODESET_NAME = 14;
15 alias CODESET = _NL_CTYPE_CODESET_NAME;
16
17 extern(C) pure nothrow @nogc {
18 char * nl_langinfo (int __item);
19 }
20
21 version(linux) {
22 /*
23 * Mappings from partial locale names to full locale names
24 */
25 enum string[string] localeAliases = [
26 "ar" : "ar_EG",
27 "be" : "be_BY",
28 "bg" : "bg_BG",
29 "br" : "br_FR",
30 "ca" : "ca_ES",
31 "cs" : "cs_CZ",
32 "cz" : "cs_CZ",
33 "da" : "da_DK",
34 "de" : "de_DE",
35 "el" : "el_GR",
36 "en" : "en_US",
37 "eo" : "eo", /* no country for Esperanto */
38 "es" : "es_ES",
39 "et" : "et_EE",
40 "eu" : "eu_ES",
41 "fi" : "fi_FI",
42 "fr" : "fr_FR",
43 "ga" : "ga_IE",
44 "gl" : "gl_ES",
45 "he" : "iw_IL",
46 "hr" : "hr_HR",
47
48 "hs" : "en_US", // used on Linux: not clear what it stands for
49
50 "hu" : "hu_HU",
51 "id" : "in_ID",
52 "in" : "in_ID",
53 "is" : "is_IS",
54 "it" : "it_IT",
55 "iw" : "iw_IL",
56 "ja" : "ja_JP",
57 "kl" : "kl_GL",
58 "ko" : "ko_KR",
59 "lt" : "lt_LT",
60 "lv" : "lv_LV",
61 "mk" : "mk_MK",
62 "nl" : "nl_NL",
63 "no" : "no_NO",
64 "pl" : "pl_PL",
65 "pt" : "pt_PT",
66 "ro" : "ro_RO",
67 "ru" : "ru_RU",
68 "se" : "se_NO",
69 "sk" : "sk_SK",
70 "sl" : "sl_SI",
71 "sq" : "sq_AL",
72 "sr" : "sr_CS",
73 "su" : "fi_FI",
74 "sv" : "sv_SE",
75 "th" : "th_TH",
76 "tr" : "tr_TR",
77
78 "ua" : "en_US", // used on Linux: not clear what it stands for
79
80 "uk" : "uk_UA",
81 "vi" : "vi_VN",
82 "wa" : "wa_BE",
83 "zh" : "zh_CN",
84
85 "bokmal" : "nb_NO",
86 "bokm\xE5l" : "nb_NO",
87 "catalan" : "ca_ES",
88 "croatian" : "hr_HR",
89 "czech" : "cs_CZ",
90 "danish" : "da_DK",
91 "dansk" : "da_DK",
92 "deutsch" : "de_DE",
93 "dutch" : "nl_NL",
94 "eesti" : "et_EE",
95 "estonian" : "et_EE",
96 "finnish" : "fi_FI",
97 "fran\xE7\x61is" : "fr_FR",
98 "french" : "fr_FR",
99 "galego" : "gl_ES",
100 "galician" : "gl_ES",
101 "german" : "de_DE",
102 "greek" : "el_GR",
103 "hebrew" : "iw_IL",
104 "hrvatski" : "hr_HR",
105 "hungarian" : "hu_HU",
106 "icelandic" : "is_IS",
107 "italian" : "it_IT",
108 "japanese" : "ja_JP",
109 "korean" : "ko_KR",
110 "lithuanian" : "lt_LT",
111 "norwegian" : "no_NO",
112 "nynorsk" : "nn_NO",
113 "polish" : "pl_PL",
114 "portuguese" : "pt_PT",
115 "romanian" : "ro_RO",
116 "russian" : "ru_RU",
117 "slovak" : "sk_SK",
118 "slovene" : "sl_SI",
119 "slovenian" : "sl_SI",
120 "spanish" : "es_ES",
121 "swedish" : "sv_SE",
122 "thai" : "th_TH",
123 "turkish" : "tr_TR"
124 ];
125
126
127 /*
128 * Linux/Solaris language string to ISO639 string mapping table.
129 */
130 enum string[string] languageNames = [
131 "C" : "en",
132 "POSIX" : "en",
133 "cz" : "cs",
134 "he" : "iw",
135
136 "hs" : "en", // used on Linux : not clear what it stands for
137
138 "id" : "in",
139 "sh" : "sr", // sh is deprecated
140 "su" : "fi",
141
142 "ua" : "en", // used on Linux : not clear what it stands for
143
144 "catalan" : "ca",
145 "croatian" : "hr",
146 "czech" : "cs",
147 "danish" : "da",
148 "dansk" : "da",
149 "deutsch" : "de",
150 "dutch" : "nl",
151 "finnish" : "fi",
152 "fran\xE7\x61is" : "fr",
153 "french" : "fr",
154 "german" : "de",
155 "greek" : "el",
156 "hebrew" : "he",
157 "hrvatski" : "hr",
158 "hungarian" : "hu",
159 "icelandic" : "is",
160 "italian" : "it",
161 "japanese" : "ja",
162 "norwegian" : "no",
163 "polish" : "pl",
164 "portuguese" : "pt",
165 "romanian" : "ro",
166 "russian" : "ru",
167 "slovak" : "sk",
168 "slovene" : "sl",
169 "slovenian" : "sl",
170 "spanish" : "es",
171 "swedish" : "sv",
172 "turkish" : "tr"
173 ];
174
175 /*
176 * Linux/Solaris script string to Java script name mapping table.
177 */
178 enum string[string] scriptNames = [
179 "cyrillic" : "Cyrl",
180 "devanagari" : "Deva",
181 "iqtelif" : "Latn",
182 "latin" : "Latn",
183 "Arab" : "Arab",
184 "Cyrl" : "Cyrl",
185 "Deva" : "Deva",
186 "Ethi" : "Ethi",
187 "Hans" : "Hans",
188 "Hant" : "Hant",
189 "Latn" : "Latn",
190 "Sund" : "Sund",
191 "Syrc" : "Syrc",
192 "Tfng" : "Tfng"
193 ];
194
195 /*
196 * Linux/Solaris country string to ISO3166 string mapping table.
197 */
198 enum string[string] countryNames = [
199 "RN" : "US", // used on Linux : not clear what it stands for
200 "YU" : "CS" // YU has been removed from ISO 3166
201 ];
202
203 } else {
204
205 enum string[string] localeAliases = [
206 "ar" : "ar_EG",
207 "be" : "be_BY",
208 "bg" : "bg_BG",
209 "br" : "br_FR",
210 "ca" : "ca_ES",
211 "cs" : "cs_CZ",
212 "cz" : "cs_CZ",
213 "da" : "da_DK",
214 "de" : "de_DE",
215 "el" : "el_GR",
216 "en" : "en_US",
217 "eo" : "eo", /* no country for Esperanto */
218 "es" : "es_ES",
219 "et" : "et_EE",
220 "eu" : "eu_ES",
221 "fi" : "fi_FI",
222 "fr" : "fr_FR",
223 "ga" : "ga_IE",
224 "gl" : "gl_ES",
225 "he" : "iw_IL",
226 "hr" : "hr_HR",
227
228 "hu" : "hu_HU",
229 "id" : "in_ID",
230 "in" : "in_ID",
231 "is" : "is_IS",
232 "it" : "it_IT",
233 "iw" : "iw_IL",
234 "ja" : "ja_JP",
235 "kl" : "kl_GL",
236 "ko" : "ko_KR",
237 "lt" : "lt_LT",
238 "lv" : "lv_LV",
239 "mk" : "mk_MK",
240 "nl" : "nl_NL",
241 "no" : "no_NO",
242 "pl" : "pl_PL",
243 "pt" : "pt_PT",
244 "ro" : "ro_RO",
245 "ru" : "ru_RU",
246 "se" : "se_NO",
247 "sk" : "sk_SK",
248 "sl" : "sl_SI",
249 "sq" : "sq_AL",
250 "sr" : "sr_CS",
251 "su" : "fi_FI",
252 "sv" : "sv_SE",
253 "th" : "th_TH",
254 "tr" : "tr_TR",
255
256 "uk" : "uk_UA",
257 "vi" : "vi_VN",
258 "wa" : "wa_BE",
259 "zh" : "zh_CN",
260
261 "big5" : "zh_TW.Big5",
262 "chinese" : "zh_CN",
263 "iso_8859_1" : "en_US.ISO8859-1",
264 "iso_8859_15" : "en_US.ISO8859-15",
265 "japanese" : "ja_JP",
266 "no_NY" : "no_NO@nynorsk",
267 "sr_SP" : "sr_YU",
268 "tchinese" : "zh_TW"
269 ];
270
271
272 /*
273 * Linux/Solaris language string to ISO639 string mapping table.
274 */
275 enum string[string] languageNames = [
276 "C" : "en",
277 "POSIX" : "en",
278 "cz" : "cs",
279 "he" : "iw",
280
281 "id" : "in",
282 "sh" : "sr", // sh is deprecated
283 "su" : "fi",
284
285 "chinese" : "zh",
286 "japanese" : "ja",
287 "korean" : "ko"
288 ];
289
290 /*
291 * Linux/Solaris script string to Java script name mapping table.
292 */
293 enum string[string] scriptNames = [
294 "Arab" : "Arab",
295 "Cyrl" : "Cyrl",
296 "Deva" : "Deva",
297 "Ethi" : "Ethi",
298 "Hans" : "Hans",
299 "Hant" : "Hant",
300 "Latn" : "Latn",
301 "Sund" : "Sund",
302 "Syrc" : "Syrc",
303 "Tfng" : "Tfng"
304 ];
305
306 /*
307 * Linux/Solaris country string to ISO3166 string mapping table.
308 */
309 enum string[string] countryNames = [
310 "YU" : "CS" // YU has been removed from ISO 3166
311 ];
312 }
313
314 enum LocaleCategory {
315 ALL = LC_ALL,
316 COLLATE = LC_COLLATE,
317 CTYPE = LC_CTYPE,
318 MONETARY = LC_MONETARY,
319 NUMERIC = LC_NUMERIC,
320 TIME = LC_TIME,
321 MESSAGES = LC_MESSAGES
322 }
323
324 /*
325 * Linux/Solaris variant string to Java variant name mapping table.
326 */
327 enum string[string] variantNames = [
328 "nynorsk" : "NY",
329 ];
330
331 // dfmt on
332
333 /**
334 see_also:
335 https://linux.die.net/man/3/setlocale
336 */
337 class Locale {
338 string language;
339 string country;
340 string encoding;
341 string variant;
342 string script;
343
344 override string toString() {
345 return format("language=%s, country=%s, encoding=%s, variant=%s, script=%s",
346 language, country, encoding, variant, script);
347 }
348
349 static Locale getUserDefault() {
350 string info = set(LocaleCategory.ALL);
351 version (HUNT_DEBUG_MORE) {
352 tracef("Locale(ALL):%s ", info);
353 }
354 return query(LocaleCategory.MESSAGES);
355 }
356
357 static Locale getSystemDefault() {
358 string info = set(LocaleCategory.ALL);
359 return query(LocaleCategory.CTYPE);
360 }
361
362 static Locale getUserUI() {
363 return getUserDefault();
364 }
365
366 static string set(LocaleCategory cat, string locale="") {
367 char* p = setlocale(cast(int)cat, locale.toStringz());
368 return cast(string)fromStringz(p);
369 }
370
371 static Locale query(LocaleCategory cat) {
372 char* lc = setlocale(cast(int)cat, null);
373 if(lc is null) {
374 return null;
375 }
376
377 string localInfo = cast(string)fromStringz(lc);
378 version(HUNT_DEBUG_MORE) tracef("category=%s, locale: %s", cat, localInfo);
379 return parse(localInfo);
380 }
381
382 static Locale parse(string localInfo) {
383 string std_language, std_country, std_encoding, std_script, std_variant;
384 // localInfo = "zh_CN.UTF-8@nynorsk"; // for test
385 if(localInfo.empty || localInfo == "C" || localInfo == "POSIX") {
386 localInfo = "en_US";
387 }
388 string temp = localInfo;
389
390 /*
391 * locale string format in Solaris is
392 * <language name>_<country name>.<encoding name>@<variant name>
393 * <country name>, <encoding name>, and <variant name> are optional.
394 */
395
396 /* Parse the language, country, encoding, and variant from the
397 * locale. Any of the elements may be missing, but they must occur
398 * in the order language_country.encoding@variant, and must be
399 * preceded by their delimiter (except for language).
400 *
401 * If the locale name (without .encoding@variant, if any) matches
402 * any of the names in the locale_aliases list, map it to the
403 * corresponding full locale name. Most of the entries in the
404 * locale_aliases list are locales that include a language name but
405 * no country name, and this facility is used to map each language
406 * to a default country if that's possible. It's also used to map
407 * the Solaris locale aliases to their proper Java locale IDs.
408 */
409
410 string encoding_variant;
411 /* Copy the leading '.' */
412 ptrdiff_t index = localInfo.indexOf('.');
413 if(index == -1) {
414 /* Copy the leading '@' */
415 index = localInfo.indexOf('@');
416 }
417
418 if(index >= 0) {
419 encoding_variant = localInfo[index .. $];
420 temp = localInfo[0..index];
421 }
422
423 string language = temp;
424 if(!temp.empty && localeAliases.hasKey(temp)) {
425 language = localeAliases[temp];
426 // check the "encoding_variant" again, if any.
427 index = language.indexOf('.');
428 if(index == -1) {
429 /* Copy the leading '@' */
430 index = language.indexOf('@');
431 }
432
433 if(index >= 0) {
434 encoding_variant = language[index .. $];
435 language = language[0 .. index];
436 }
437 }
438
439 //
440 string country;
441 index = language.indexOf('_');
442 if(index >= 0) {
443 country = language[index+1 .. $];
444 language = language[0..index];
445 }
446
447 //
448 string encoding;
449 index = encoding_variant.indexOf('.');
450 if(index >= 0) {
451 encoding = encoding_variant[index+1 .. $];
452 }
453
454 //
455 string variant;
456 index = encoding.indexOf('@');
457 if(index >= 0) {
458 variant = encoding[index+1 .. $];
459 encoding = encoding[0 .. index];
460 }
461
462 // version(HUNT_DEBUG) {
463 // tracef("language=%s, country=%s, variant=%s, encoding=%s",
464 // language, country, variant, encoding);
465 // }
466
467 /* Normalize the language name */
468 if(language.empty() ) {
469 std_language = "en";
470 } else if(languageNames.hasKey(language)) {
471 std_language = languageNames[language];
472 } else {
473 std_language = language;
474 }
475
476 /* Normalize the country name */
477 if(!country.empty()) {
478 if(countryNames.hasKey(country)) {
479 std_country= countryNames[country];
480 } else {
481 std_country = country;
482 }
483 }
484
485 /* Normalize the script and variant name. Note that we only use
486 * variants listed in the mapping array; others are ignored.
487 */
488 if(scriptNames.hasKey(variant))
489 std_script = scriptNames[variant];
490
491 if(variantNames.hasKey(variant))
492 std_variant = variantNames[variant];
493
494 /* Normalize the encoding name. Note that we IGNORE the string
495 * 'encoding' extracted from the locale name above. Instead, we use the
496 * more reliable method of calling nl_langinfo(CODESET). This function
497 * returns an empty string if no encoding is set for the given locale
498 * (e.g., the C or POSIX locales); we use the default ISO 8859-1
499 * converter for such locales.
500 */
501
502 /* OK, not so reliable - nl_langinfo() gives wrong answers on
503 * Euro locales, in particular. */
504 string p = encoding;
505 if (p != "ISO8859-15") {
506 char * _p = nl_langinfo(CODESET);
507 p = cast(string)fromStringz(_p);
508 }
509 /* Convert the bare "646" used on Solaris to a proper IANA name */
510 if (p == "646")
511 p = "ISO646-US";
512
513 /* return same result nl_langinfo would return for en_UK,
514 * in order to use optimizations. */
515 if(p.empty)
516 std_encoding = "ISO8859-1";
517 else
518 std_encoding = p;
519
520 version(linux) {
521 /*
522 * Remap the encoding string to a different value for japanese
523 * locales on linux so that customized converters are used instead
524 * of the default converter for "EUC-JP". The customized converters
525 * omit support for the JIS0212 encoding which is not supported by
526 * the variant of "EUC-JP" encoding used on linux
527 */
528 if (p == "EUC-JP") std_encoding = "EUC-JP-LINUX";
529 } else {
530 if (p == "eucJP") {
531 /* For Solaris use customized vendor defined character
532 * customized EUC-JP converter
533 */
534 std_encoding = "eucJP-open";
535 } else if (p == "Big5" || p == "BIG5") {
536 /*
537 * Remap the encoding string to Big5_Solaris which augments
538 * the default converter for Solaris Big5 locales to include
539 * seven additional ideographic characters beyond those included
540 * in the Java "Big5" converter.
541 */
542 std_encoding = "Big5_Solaris";
543 } else if (p == "Big5-HKSCS") {
544 /*
545 * Solaris uses HKSCS2001
546 */
547 std_encoding = "Big5-HKSCS-2001";
548 }
549 }
550
551 version(OSX) {
552 /*
553 * For the case on MacOS X where encoding is set to US-ASCII, but we
554 * don't have any encoding hints from LANG/LC_ALL/LC_CTYPE, use UTF-8
555 * instead.
556 *
557 * The contents of ASCII files will still be read and displayed
558 * correctly, but so will files containing UTF-8 characters beyond the
559 * standard ASCII range.
560 *
561 * Specifically, this allows apps launched by double-clicking a .jar
562 * file to correctly read UTF-8 files using the default encoding (see
563 * 8011194).
564 */
565 string lang = environment.get("LANG", "");
566 string lcall = environment.get("LC_ALL", "");
567 string lctype = environment.get("LC_CTYPE", "");
568 if (p == "US-ASCII" && lang.empty() &&
569 lcall.empty() && lctype.empty()) {
570 std_encoding = "UTF-8";
571 }
572 }
573
574 Locale locale = new Locale();
575 locale.language = std_language;
576 locale.country = std_country;
577 locale.encoding = std_encoding;
578 locale.variant = std_variant;
579 locale.script = std_script;
580
581 return locale;
582 }
583 }
584
585 } else version(Windows) {
586
587 import core.sys.windows.winbase;
588 import core.sys.windows.w32api;
589 import core.sys.windows.winnls;
590 import core.sys.windows.winnt;
591 import core.stdc.stdio;
592 import core.stdc.stdlib;
593 import core.stdc.string;
594
595 // dfmt off
596 static if (_WIN32_WINNT >= 0x0600) {
597 enum : LCTYPE {
598 LOCALE_SNAME = 0x0000005c, // locale name (ie: en-us)
599 LOCALE_SDURATION = 0x0000005d, // time duration format, eg "hh:mm:ss"
600 LOCALE_SSHORTESTDAYNAME1 = 0x00000060, // Shortest day name for Monday
601 LOCALE_SSHORTESTDAYNAME2 = 0x00000061, // Shortest day name for Tuesday
602 LOCALE_SSHORTESTDAYNAME3 = 0x00000062, // Shortest day name for Wednesday
603 LOCALE_SSHORTESTDAYNAME4 = 0x00000063, // Shortest day name for Thursday
604 LOCALE_SSHORTESTDAYNAME5 = 0x00000064, // Shortest day name for Friday
605 LOCALE_SSHORTESTDAYNAME6 = 0x00000065, // Shortest day name for Saturday
606 LOCALE_SSHORTESTDAYNAME7 = 0x00000066, // Shortest day name for Sunday
607 LOCALE_SISO639LANGNAME2 = 0x00000067, // 3 character ISO abbreviated language name, eg "eng"
608 LOCALE_SISO3166CTRYNAME2 = 0x00000068, // 3 character ISO country/region name, eg "USA"
609 LOCALE_SNAN = 0x00000069, // Not a Number, eg "NaN"
610 LOCALE_SPOSINFINITY = 0x0000006a, // + Infinity, eg "infinity"
611 LOCALE_SNEGINFINITY = 0x0000006b, // - Infinity, eg "-infinity"
612 LOCALE_SSCRIPTS = 0x0000006c, // Typical scripts in the locale: ; delimited script codes, eg "Latn;"
613 LOCALE_SPARENT = 0x0000006d, // Fallback name for resources, eg "en" for "en-US"
614 LOCALE_SCONSOLEFALLBACKNAME = 0x0000006e // Fallback name for within the console for Unicode Only locales, eg "en" for bn-IN
615 }
616
617 }
618
619 // dfmt on
620
621 /**
622 see_also:
623 https://linux.die.net/man/3/setlocale
624 */
625 class Locale {
626 string language;
627 string country;
628 string encoding;
629 string variant;
630 string script;
631
632 override string toString() {
633 return format("language=%s, country=%s, encoding=%s, variant=%s, script=%s",
634 language, country, encoding, variant, script);
635 }
636
637 static Locale getUserDefault() {
638 /*
639 * query the system for the current system default locale
640 * (which is a Windows LCID value),
641 */
642 LCID userDefaultLCID = GetUserDefaultLCID();
643 return query(userDefaultLCID);
644 }
645
646 static Locale getSystemDefault() {
647 LCID systemDefaultLCID = GetSystemDefaultLCID();
648 return query(systemDefaultLCID);
649 }
650
651 static Locale getUserUI() {
652 LCID userDefaultLCID = GetUserDefaultLCID();
653 LCID userDefaultUILang = GetUserDefaultUILanguage();
654 // Windows UI Language selection list only cares "language"
655 // information of the UI Language. For example, the list
656 // just lists "English" but it actually means "en_US", and
657 // the user cannot select "en_GB" (if exists) in the list.
658 // So, this hack is to use the user LCID region information
659 // for the UI Language, if the "language" portion of those
660 // two locales are the same.
661 if (PRIMARYLANGID(LANGIDFROMLCID(userDefaultLCID)) ==
662 PRIMARYLANGID(LANGIDFROMLCID(userDefaultUILang))) {
663 userDefaultUILang = userDefaultLCID;
664 }
665 return query(userDefaultUILang);
666 }
667
668 static Locale query(LCID lcid) {
669 Locale locale = new Locale();
670
671 enum PROPSIZE = 9; // eight-letter + null terminator
672 enum SNAMESIZE = 86; // max number of chars for LOCALE_SNAME is 85
673
674 size_t len;
675 static if (_WIN32_WINNT >= 0x0600) {
676 /* script */
677 char[SNAMESIZE] tmp;
678 char[PROPSIZE] script;
679 if (GetLocaleInfoA(lcid, LOCALE_SNAME, tmp.ptr, SNAMESIZE) == 0) {
680 script[0] = '\0';
681 } else if(sscanf(tmp.ptr, "%*[a-z\\-]%1[A-Z]%[a-z]", script.ptr, script.ptr+1) == 0) {
682 script[0] = '\0';
683 }
684
685 // writefln("script=[%s]", script);
686 len = strlen(script.ptr);
687 if(len == 4) {
688 locale.script = cast(string)fromStringz(script.ptr);
689 }
690 }
691
692 /* country */
693 char[PROPSIZE] country;
694 if (GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, country.ptr, PROPSIZE) == 0 ) {
695 static if (_WIN32_WINNT >= 0x0600) {
696 if(GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME2, country.ptr, PROPSIZE) == 0) {
697 country[0] = '\0';
698 }
699 } else {
700 country[0] = '\0';
701 }
702 }
703
704 /* language */
705 char[PROPSIZE] language;
706 if (GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, language.ptr, PROPSIZE) == 0) {
707 static if (_WIN32_WINNT >= 0x0600) {
708 if(GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME2, language.ptr, PROPSIZE) == 0) {
709 language[0] = '\0';
710 }
711 } else {
712 language[0] = '\0';
713 }
714 }
715
716 len = strlen(language.ptr);
717 if(len == 0) {
718 /* defaults to en_US */
719 locale.language = "en";
720 locale.country = "US";
721 } else {
722 locale.language = cast(string)language[0..len].dup;
723 len = strlen(country.ptr);
724 if(len > 0)
725 locale.country = cast(string)country[0..len].dup;
726 }
727 // writefln("language=[%s], %s", language, locale.language);
728 // writefln("country=[%s], %s", country, locale.country);
729
730 /* variant */
731
732 /* handling for Norwegian */
733 if (locale.language == "nb") {
734 locale.language = "no";
735 locale.country = "NO";
736 } else if (locale.language == "nn") {
737 locale.language = "no";
738 locale.country = "NO";
739 locale.variant = "NY";
740 }
741
742 /* encoding */
743 locale.encoding = getEncodingInternal(lcid);
744 return locale;
745 }
746
747 static string getEncodingFromLangID(LANGID langID) {
748 return getEncodingInternal(MAKELCID(langID, SORT_DEFAULT));
749 }
750
751 private static string getEncodingInternal(LCID lcid) {
752 string encoding;
753 int codepage;
754 char[16] ret;
755 if (GetLocaleInfoA(lcid, LOCALE_IDEFAULTANSICODEPAGE,
756 ret.ptr, 14) == 0) {
757 codepage = 1252;
758 } else {
759 codepage = atoi(ret.ptr);
760 }
761 // import std.stdio;
762 // writefln("codepage=%d, ret: [%(%02X %)]", codepage, cast(ubyte[])ret);
763
764 size_t len = strlen(ret.ptr);
765 switch (codepage) {
766 case 0:
767 encoding = "UTF-8";
768 break;
769 case 874: /* 9:Thai */
770 case 932: /* 10:Japanese */
771 case 949: /* 12:Korean Extended Wansung */
772 case 950: /* 13:Chinese (Taiwan, Hongkong, Macau) */
773 case 1361: /* 15:Korean Johab */
774 encoding = "MS" ~ cast(string)ret[0..len].dup;
775 break;
776 case 936:
777 encoding = "GBK";
778 break;
779 case 54936:
780 encoding = "GB18030";
781 break;
782 default:
783 encoding = "Cp" ~ cast(string)ret[0..len].dup;
784 break;
785 }
786
787 //Traditional Chinese Windows should use MS950_HKSCS_XP as the
788 //default encoding, if HKSCS patch has been installed.
789 // "old" MS950 0xfa41 -> u+e001
790 // "new" MS950 0xfa41 -> u+92db
791 if (encoding == "MS950") {
792 CHAR[2] mbChar = [cast(char)0xfa, cast(char)0x41];
793 WCHAR unicodeChar;
794 MultiByteToWideChar(CP_ACP, 0, mbChar.ptr, 2, &unicodeChar, 1);
795 if (unicodeChar == 0x92db) {
796 encoding = "MS950_HKSCS_XP";
797 }
798 } else {
799 //SimpChinese Windows should use GB18030 as the default
800 //encoding, if gb18030 patch has been installed (on windows
801 //2000/XP, (1)Codepage 54936 will be available
802 //(2)simsun18030.ttc will exist under system fonts dir )
803 if (encoding == "GBK" && IsValidCodePage(54936)) {
804 char[MAX_PATH + 1] systemPath;
805 enum string gb18030Font = "\\FONTS\\SimSun18030.ttc";
806 // if(GetWindowsDirectory(systemPath.ptr, MAX_PATH + 1) != 0) {
807 // import std.path;
808 // import std.file;
809
810 // }
811
812 FILE *f = NULL;
813 if (GetWindowsDirectoryA(systemPath.ptr, MAX_PATH + 1) != 0 &&
814 strlen(systemPath.ptr) + gb18030Font.length < MAX_PATH + 1) {
815 strcat(systemPath.ptr, gb18030Font);
816 if ((f = fopen(systemPath.ptr, "r")) != NULL) {
817 fclose(f);
818 encoding = "GB18030";
819 }
820 }
821 }
822 }
823
824 return encoding;
825 }
826 }
827
828 } else {
829 static assert(false, "Unsupported OS");
830 }