1 module hunt.system.Locale; 2 3 import hunt.Functions; 4 import hunt.logging; 5 6 import core.stdc.locale; 7 import std.format; 8 import std.process; 9 import std.string; 10 11 // dfmt off 12 version(Posix) { 13 14 enum _NL_CTYPE_CODESET_NAME = 14; 15 alias CODESET = _NL_CTYPE_CODESET_NAME; 16 17 extern(C) pure nothrow @nogc { 18 char * nl_langinfo (int __item); 19 } 20 21 version(linux) { 22 /* 23 * Mappings from partial locale names to full locale names 24 */ 25 enum string[string] localeAliases = [ 26 "ar" : "ar_EG", 27 "be" : "be_BY", 28 "bg" : "bg_BG", 29 "br" : "br_FR", 30 "ca" : "ca_ES", 31 "cs" : "cs_CZ", 32 "cz" : "cs_CZ", 33 "da" : "da_DK", 34 "de" : "de_DE", 35 "el" : "el_GR", 36 "en" : "en_US", 37 "eo" : "eo", /* no country for Esperanto */ 38 "es" : "es_ES", 39 "et" : "et_EE", 40 "eu" : "eu_ES", 41 "fi" : "fi_FI", 42 "fr" : "fr_FR", 43 "ga" : "ga_IE", 44 "gl" : "gl_ES", 45 "he" : "iw_IL", 46 "hr" : "hr_HR", 47 48 "hs" : "en_US", // used on Linux: not clear what it stands for 49 50 "hu" : "hu_HU", 51 "id" : "in_ID", 52 "in" : "in_ID", 53 "is" : "is_IS", 54 "it" : "it_IT", 55 "iw" : "iw_IL", 56 "ja" : "ja_JP", 57 "kl" : "kl_GL", 58 "ko" : "ko_KR", 59 "lt" : "lt_LT", 60 "lv" : "lv_LV", 61 "mk" : "mk_MK", 62 "nl" : "nl_NL", 63 "no" : "no_NO", 64 "pl" : "pl_PL", 65 "pt" : "pt_PT", 66 "ro" : "ro_RO", 67 "ru" : "ru_RU", 68 "se" : "se_NO", 69 "sk" : "sk_SK", 70 "sl" : "sl_SI", 71 "sq" : "sq_AL", 72 "sr" : "sr_CS", 73 "su" : "fi_FI", 74 "sv" : "sv_SE", 75 "th" : "th_TH", 76 "tr" : "tr_TR", 77 78 "ua" : "en_US", // used on Linux: not clear what it stands for 79 80 "uk" : "uk_UA", 81 "vi" : "vi_VN", 82 "wa" : "wa_BE", 83 "zh" : "zh_CN", 84 85 "bokmal" : "nb_NO", 86 "bokm\xE5l" : "nb_NO", 87 "catalan" : "ca_ES", 88 "croatian" : "hr_HR", 89 "czech" : "cs_CZ", 90 "danish" : "da_DK", 91 "dansk" : "da_DK", 92 "deutsch" : "de_DE", 93 "dutch" : "nl_NL", 94 "eesti" : "et_EE", 95 "estonian" : "et_EE", 96 "finnish" : "fi_FI", 97 "fran\xE7\x61is" : "fr_FR", 98 "french" : "fr_FR", 99 "galego" : "gl_ES", 100 "galician" : "gl_ES", 101 "german" : "de_DE", 102 "greek" : "el_GR", 103 "hebrew" : "iw_IL", 104 "hrvatski" : "hr_HR", 105 "hungarian" : "hu_HU", 106 "icelandic" : "is_IS", 107 "italian" : "it_IT", 108 "japanese" : "ja_JP", 109 "korean" : "ko_KR", 110 "lithuanian" : "lt_LT", 111 "norwegian" : "no_NO", 112 "nynorsk" : "nn_NO", 113 "polish" : "pl_PL", 114 "portuguese" : "pt_PT", 115 "romanian" : "ro_RO", 116 "russian" : "ru_RU", 117 "slovak" : "sk_SK", 118 "slovene" : "sl_SI", 119 "slovenian" : "sl_SI", 120 "spanish" : "es_ES", 121 "swedish" : "sv_SE", 122 "thai" : "th_TH", 123 "turkish" : "tr_TR" 124 ]; 125 126 127 /* 128 * Linux/Solaris language string to ISO639 string mapping table. 129 */ 130 enum string[string] languageNames = [ 131 "C" : "en", 132 "POSIX" : "en", 133 "cz" : "cs", 134 "he" : "iw", 135 136 "hs" : "en", // used on Linux : not clear what it stands for 137 138 "id" : "in", 139 "sh" : "sr", // sh is deprecated 140 "su" : "fi", 141 142 "ua" : "en", // used on Linux : not clear what it stands for 143 144 "catalan" : "ca", 145 "croatian" : "hr", 146 "czech" : "cs", 147 "danish" : "da", 148 "dansk" : "da", 149 "deutsch" : "de", 150 "dutch" : "nl", 151 "finnish" : "fi", 152 "fran\xE7\x61is" : "fr", 153 "french" : "fr", 154 "german" : "de", 155 "greek" : "el", 156 "hebrew" : "he", 157 "hrvatski" : "hr", 158 "hungarian" : "hu", 159 "icelandic" : "is", 160 "italian" : "it", 161 "japanese" : "ja", 162 "norwegian" : "no", 163 "polish" : "pl", 164 "portuguese" : "pt", 165 "romanian" : "ro", 166 "russian" : "ru", 167 "slovak" : "sk", 168 "slovene" : "sl", 169 "slovenian" : "sl", 170 "spanish" : "es", 171 "swedish" : "sv", 172 "turkish" : "tr" 173 ]; 174 175 /* 176 * Linux/Solaris script string to Java script name mapping table. 177 */ 178 enum string[string] scriptNames = [ 179 "cyrillic" : "Cyrl", 180 "devanagari" : "Deva", 181 "iqtelif" : "Latn", 182 "latin" : "Latn", 183 "Arab" : "Arab", 184 "Cyrl" : "Cyrl", 185 "Deva" : "Deva", 186 "Ethi" : "Ethi", 187 "Hans" : "Hans", 188 "Hant" : "Hant", 189 "Latn" : "Latn", 190 "Sund" : "Sund", 191 "Syrc" : "Syrc", 192 "Tfng" : "Tfng" 193 ]; 194 195 /* 196 * Linux/Solaris country string to ISO3166 string mapping table. 197 */ 198 enum string[string] countryNames = [ 199 "RN" : "US", // used on Linux : not clear what it stands for 200 "YU" : "CS" // YU has been removed from ISO 3166 201 ]; 202 203 } else { 204 205 enum string[string] localeAliases = [ 206 "ar" : "ar_EG", 207 "be" : "be_BY", 208 "bg" : "bg_BG", 209 "br" : "br_FR", 210 "ca" : "ca_ES", 211 "cs" : "cs_CZ", 212 "cz" : "cs_CZ", 213 "da" : "da_DK", 214 "de" : "de_DE", 215 "el" : "el_GR", 216 "en" : "en_US", 217 "eo" : "eo", /* no country for Esperanto */ 218 "es" : "es_ES", 219 "et" : "et_EE", 220 "eu" : "eu_ES", 221 "fi" : "fi_FI", 222 "fr" : "fr_FR", 223 "ga" : "ga_IE", 224 "gl" : "gl_ES", 225 "he" : "iw_IL", 226 "hr" : "hr_HR", 227 228 "hu" : "hu_HU", 229 "id" : "in_ID", 230 "in" : "in_ID", 231 "is" : "is_IS", 232 "it" : "it_IT", 233 "iw" : "iw_IL", 234 "ja" : "ja_JP", 235 "kl" : "kl_GL", 236 "ko" : "ko_KR", 237 "lt" : "lt_LT", 238 "lv" : "lv_LV", 239 "mk" : "mk_MK", 240 "nl" : "nl_NL", 241 "no" : "no_NO", 242 "pl" : "pl_PL", 243 "pt" : "pt_PT", 244 "ro" : "ro_RO", 245 "ru" : "ru_RU", 246 "se" : "se_NO", 247 "sk" : "sk_SK", 248 "sl" : "sl_SI", 249 "sq" : "sq_AL", 250 "sr" : "sr_CS", 251 "su" : "fi_FI", 252 "sv" : "sv_SE", 253 "th" : "th_TH", 254 "tr" : "tr_TR", 255 256 "uk" : "uk_UA", 257 "vi" : "vi_VN", 258 "wa" : "wa_BE", 259 "zh" : "zh_CN", 260 261 "big5" : "zh_TW.Big5", 262 "chinese" : "zh_CN", 263 "iso_8859_1" : "en_US.ISO8859-1", 264 "iso_8859_15" : "en_US.ISO8859-15", 265 "japanese" : "ja_JP", 266 "no_NY" : "no_NO@nynorsk", 267 "sr_SP" : "sr_YU", 268 "tchinese" : "zh_TW" 269 ]; 270 271 272 /* 273 * Linux/Solaris language string to ISO639 string mapping table. 274 */ 275 enum string[string] languageNames = [ 276 "C" : "en", 277 "POSIX" : "en", 278 "cz" : "cs", 279 "he" : "iw", 280 281 "id" : "in", 282 "sh" : "sr", // sh is deprecated 283 "su" : "fi", 284 285 "chinese" : "zh", 286 "japanese" : "ja", 287 "korean" : "ko" 288 ]; 289 290 /* 291 * Linux/Solaris script string to Java script name mapping table. 292 */ 293 enum string[string] scriptNames = [ 294 "Arab" : "Arab", 295 "Cyrl" : "Cyrl", 296 "Deva" : "Deva", 297 "Ethi" : "Ethi", 298 "Hans" : "Hans", 299 "Hant" : "Hant", 300 "Latn" : "Latn", 301 "Sund" : "Sund", 302 "Syrc" : "Syrc", 303 "Tfng" : "Tfng" 304 ]; 305 306 /* 307 * Linux/Solaris country string to ISO3166 string mapping table. 308 */ 309 enum string[string] countryNames = [ 310 "YU" : "CS" // YU has been removed from ISO 3166 311 ]; 312 } 313 314 enum LocaleCategory { 315 ALL = LC_ALL, 316 COLLATE = LC_COLLATE, 317 CTYPE = LC_CTYPE, 318 MONETARY = LC_MONETARY, 319 NUMERIC = LC_NUMERIC, 320 TIME = LC_TIME, 321 MESSAGES = LC_MESSAGES 322 } 323 324 /* 325 * Linux/Solaris variant string to Java variant name mapping table. 326 */ 327 enum string[string] variantNames = [ 328 "nynorsk" : "NY", 329 ]; 330 331 // dfmt on 332 333 /** 334 see_also: 335 https://linux.die.net/man/3/setlocale 336 */ 337 class Locale { 338 string language; 339 string country; 340 string encoding; 341 string variant; 342 string script; 343 344 override string toString() { 345 return format("language=%s, country=%s, encoding=%s, variant=%s, script=%s", 346 language, country, encoding, variant, script); 347 } 348 349 static Locale getUserDefault() { 350 string info = set(LocaleCategory.ALL); 351 version (HUNT_DEBUG_MORE) { 352 tracef("Locale(ALL):%s ", info); 353 } 354 return query(LocaleCategory.MESSAGES); 355 } 356 357 static Locale getSystemDefault() { 358 string info = set(LocaleCategory.ALL); 359 return query(LocaleCategory.CTYPE); 360 } 361 362 static Locale getUserUI() { 363 return getUserDefault(); 364 } 365 366 static string set(LocaleCategory cat, string locale="") { 367 char* p = setlocale(cast(int)cat, locale.toStringz()); 368 return cast(string)fromStringz(p); 369 } 370 371 static Locale query(LocaleCategory cat) { 372 char* lc = setlocale(cast(int)cat, null); 373 if(lc is null) { 374 return null; 375 } 376 377 string localInfo = cast(string)fromStringz(lc); 378 version(HUNT_DEBUG_MORE) tracef("category=%s, locale: %s", cat, localInfo); 379 return parse(localInfo); 380 } 381 382 static Locale parse(string localInfo) { 383 string std_language, std_country, std_encoding, std_script, std_variant; 384 // localInfo = "zh_CN.UTF-8@nynorsk"; // for test 385 if(localInfo.empty || localInfo == "C" || localInfo == "POSIX") { 386 localInfo = "en_US"; 387 } 388 string temp = localInfo; 389 390 /* 391 * locale string format in Solaris is 392 * <language name>_<country name>.<encoding name>@<variant name> 393 * <country name>, <encoding name>, and <variant name> are optional. 394 */ 395 396 /* Parse the language, country, encoding, and variant from the 397 * locale. Any of the elements may be missing, but they must occur 398 * in the order language_country.encoding@variant, and must be 399 * preceded by their delimiter (except for language). 400 * 401 * If the locale name (without .encoding@variant, if any) matches 402 * any of the names in the locale_aliases list, map it to the 403 * corresponding full locale name. Most of the entries in the 404 * locale_aliases list are locales that include a language name but 405 * no country name, and this facility is used to map each language 406 * to a default country if that's possible. It's also used to map 407 * the Solaris locale aliases to their proper Java locale IDs. 408 */ 409 410 string encoding_variant; 411 /* Copy the leading '.' */ 412 ptrdiff_t index = localInfo.indexOf('.'); 413 if(index == -1) { 414 /* Copy the leading '@' */ 415 index = localInfo.indexOf('@'); 416 } 417 418 if(index >= 0) { 419 encoding_variant = localInfo[index .. $]; 420 temp = localInfo[0..index]; 421 } 422 423 string language = temp; 424 if(!temp.empty && localeAliases.hasKey(temp)) { 425 language = localeAliases[temp]; 426 // check the "encoding_variant" again, if any. 427 index = language.indexOf('.'); 428 if(index == -1) { 429 /* Copy the leading '@' */ 430 index = language.indexOf('@'); 431 } 432 433 if(index >= 0) { 434 encoding_variant = language[index .. $]; 435 language = language[0 .. index]; 436 } 437 } 438 439 // 440 string country; 441 index = language.indexOf('_'); 442 if(index >= 0) { 443 country = language[index+1 .. $]; 444 language = language[0..index]; 445 } 446 447 // 448 string encoding; 449 index = encoding_variant.indexOf('.'); 450 if(index >= 0) { 451 encoding = encoding_variant[index+1 .. $]; 452 } 453 454 // 455 string variant; 456 index = encoding.indexOf('@'); 457 if(index >= 0) { 458 variant = encoding[index+1 .. $]; 459 encoding = encoding[0 .. index]; 460 } 461 462 // version(HUNT_DEBUG) { 463 // tracef("language=%s, country=%s, variant=%s, encoding=%s", 464 // language, country, variant, encoding); 465 // } 466 467 /* Normalize the language name */ 468 if(language.empty() ) { 469 std_language = "en"; 470 } else if(languageNames.hasKey(language)) { 471 std_language = languageNames[language]; 472 } else { 473 std_language = language; 474 } 475 476 /* Normalize the country name */ 477 if(!country.empty()) { 478 if(countryNames.hasKey(country)) { 479 std_country= countryNames[country]; 480 } else { 481 std_country = country; 482 } 483 } 484 485 /* Normalize the script and variant name. Note that we only use 486 * variants listed in the mapping array; others are ignored. 487 */ 488 if(scriptNames.hasKey(variant)) 489 std_script = scriptNames[variant]; 490 491 if(variantNames.hasKey(variant)) 492 std_variant = variantNames[variant]; 493 494 /* Normalize the encoding name. Note that we IGNORE the string 495 * 'encoding' extracted from the locale name above. Instead, we use the 496 * more reliable method of calling nl_langinfo(CODESET). This function 497 * returns an empty string if no encoding is set for the given locale 498 * (e.g., the C or POSIX locales); we use the default ISO 8859-1 499 * converter for such locales. 500 */ 501 502 /* OK, not so reliable - nl_langinfo() gives wrong answers on 503 * Euro locales, in particular. */ 504 string p = encoding; 505 if (p != "ISO8859-15") { 506 char * _p = nl_langinfo(CODESET); 507 p = cast(string)fromStringz(_p); 508 } 509 /* Convert the bare "646" used on Solaris to a proper IANA name */ 510 if (p == "646") 511 p = "ISO646-US"; 512 513 /* return same result nl_langinfo would return for en_UK, 514 * in order to use optimizations. */ 515 if(p.empty) 516 std_encoding = "ISO8859-1"; 517 else 518 std_encoding = p; 519 520 version(linux) { 521 /* 522 * Remap the encoding string to a different value for japanese 523 * locales on linux so that customized converters are used instead 524 * of the default converter for "EUC-JP". The customized converters 525 * omit support for the JIS0212 encoding which is not supported by 526 * the variant of "EUC-JP" encoding used on linux 527 */ 528 if (p == "EUC-JP") std_encoding = "EUC-JP-LINUX"; 529 } else { 530 if (p == "eucJP") { 531 /* For Solaris use customized vendor defined character 532 * customized EUC-JP converter 533 */ 534 std_encoding = "eucJP-open"; 535 } else if (p == "Big5" || p == "BIG5") { 536 /* 537 * Remap the encoding string to Big5_Solaris which augments 538 * the default converter for Solaris Big5 locales to include 539 * seven additional ideographic characters beyond those included 540 * in the Java "Big5" converter. 541 */ 542 std_encoding = "Big5_Solaris"; 543 } else if (p == "Big5-HKSCS") { 544 /* 545 * Solaris uses HKSCS2001 546 */ 547 std_encoding = "Big5-HKSCS-2001"; 548 } 549 } 550 551 version(OSX) { 552 /* 553 * For the case on MacOS X where encoding is set to US-ASCII, but we 554 * don't have any encoding hints from LANG/LC_ALL/LC_CTYPE, use UTF-8 555 * instead. 556 * 557 * The contents of ASCII files will still be read and displayed 558 * correctly, but so will files containing UTF-8 characters beyond the 559 * standard ASCII range. 560 * 561 * Specifically, this allows apps launched by double-clicking a .jar 562 * file to correctly read UTF-8 files using the default encoding (see 563 * 8011194). 564 */ 565 string lang = environment.get("LANG", ""); 566 string lcall = environment.get("LC_ALL", ""); 567 string lctype = environment.get("LC_CTYPE", ""); 568 if (p == "US-ASCII" && lang.empty() && 569 lcall.empty() && lctype.empty()) { 570 std_encoding = "UTF-8"; 571 } 572 } 573 574 Locale locale = new Locale(); 575 locale.language = std_language; 576 locale.country = std_country; 577 locale.encoding = std_encoding; 578 locale.variant = std_variant; 579 locale.script = std_script; 580 581 return locale; 582 } 583 } 584 585 } else version(Windows) { 586 587 import core.sys.windows.winbase; 588 import core.sys.windows.w32api; 589 import core.sys.windows.winnls; 590 import core.sys.windows.winnt; 591 import core.stdc.stdio; 592 import core.stdc.stdlib; 593 import core.stdc.string; 594 595 // dfmt off 596 static if (_WIN32_WINNT >= 0x0600) { 597 enum : LCTYPE { 598 LOCALE_SNAME = 0x0000005c, // locale name (ie: en-us) 599 LOCALE_SDURATION = 0x0000005d, // time duration format, eg "hh:mm:ss" 600 LOCALE_SSHORTESTDAYNAME1 = 0x00000060, // Shortest day name for Monday 601 LOCALE_SSHORTESTDAYNAME2 = 0x00000061, // Shortest day name for Tuesday 602 LOCALE_SSHORTESTDAYNAME3 = 0x00000062, // Shortest day name for Wednesday 603 LOCALE_SSHORTESTDAYNAME4 = 0x00000063, // Shortest day name for Thursday 604 LOCALE_SSHORTESTDAYNAME5 = 0x00000064, // Shortest day name for Friday 605 LOCALE_SSHORTESTDAYNAME6 = 0x00000065, // Shortest day name for Saturday 606 LOCALE_SSHORTESTDAYNAME7 = 0x00000066, // Shortest day name for Sunday 607 LOCALE_SISO639LANGNAME2 = 0x00000067, // 3 character ISO abbreviated language name, eg "eng" 608 LOCALE_SISO3166CTRYNAME2 = 0x00000068, // 3 character ISO country/region name, eg "USA" 609 LOCALE_SNAN = 0x00000069, // Not a Number, eg "NaN" 610 LOCALE_SPOSINFINITY = 0x0000006a, // + Infinity, eg "infinity" 611 LOCALE_SNEGINFINITY = 0x0000006b, // - Infinity, eg "-infinity" 612 LOCALE_SSCRIPTS = 0x0000006c, // Typical scripts in the locale: ; delimited script codes, eg "Latn;" 613 LOCALE_SPARENT = 0x0000006d, // Fallback name for resources, eg "en" for "en-US" 614 LOCALE_SCONSOLEFALLBACKNAME = 0x0000006e // Fallback name for within the console for Unicode Only locales, eg "en" for bn-IN 615 } 616 617 } 618 619 // dfmt on 620 621 /** 622 see_also: 623 https://linux.die.net/man/3/setlocale 624 */ 625 class Locale { 626 string language; 627 string country; 628 string encoding; 629 string variant; 630 string script; 631 632 override string toString() { 633 return format("language=%s, country=%s, encoding=%s, variant=%s, script=%s", 634 language, country, encoding, variant, script); 635 } 636 637 static Locale getUserDefault() { 638 /* 639 * query the system for the current system default locale 640 * (which is a Windows LCID value), 641 */ 642 LCID userDefaultLCID = GetUserDefaultLCID(); 643 return query(userDefaultLCID); 644 } 645 646 static Locale getSystemDefault() { 647 LCID systemDefaultLCID = GetSystemDefaultLCID(); 648 return query(systemDefaultLCID); 649 } 650 651 static Locale getUserUI() { 652 LCID userDefaultLCID = GetUserDefaultLCID(); 653 LCID userDefaultUILang = GetUserDefaultUILanguage(); 654 // Windows UI Language selection list only cares "language" 655 // information of the UI Language. For example, the list 656 // just lists "English" but it actually means "en_US", and 657 // the user cannot select "en_GB" (if exists) in the list. 658 // So, this hack is to use the user LCID region information 659 // for the UI Language, if the "language" portion of those 660 // two locales are the same. 661 if (PRIMARYLANGID(LANGIDFROMLCID(userDefaultLCID)) == 662 PRIMARYLANGID(LANGIDFROMLCID(userDefaultUILang))) { 663 userDefaultUILang = userDefaultLCID; 664 } 665 return query(userDefaultUILang); 666 } 667 668 static Locale query(LCID lcid) { 669 Locale locale = new Locale(); 670 671 enum PROPSIZE = 9; // eight-letter + null terminator 672 enum SNAMESIZE = 86; // max number of chars for LOCALE_SNAME is 85 673 674 size_t len; 675 static if (_WIN32_WINNT >= 0x0600) { 676 /* script */ 677 char[SNAMESIZE] tmp; 678 char[PROPSIZE] script; 679 if (GetLocaleInfoA(lcid, LOCALE_SNAME, tmp.ptr, SNAMESIZE) == 0) { 680 script[0] = '\0'; 681 } else if(sscanf(tmp.ptr, "%*[a-z\\-]%1[A-Z]%[a-z]", script.ptr, script.ptr+1) == 0) { 682 script[0] = '\0'; 683 } 684 685 // writefln("script=[%s]", script); 686 len = strlen(script.ptr); 687 if(len == 4) { 688 locale.script = cast(string)fromStringz(script.ptr); 689 } 690 } 691 692 /* country */ 693 char[PROPSIZE] country; 694 if (GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, country.ptr, PROPSIZE) == 0 ) { 695 static if (_WIN32_WINNT >= 0x0600) { 696 if(GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME2, country.ptr, PROPSIZE) == 0) { 697 country[0] = '\0'; 698 } 699 } else { 700 country[0] = '\0'; 701 } 702 } 703 704 /* language */ 705 char[PROPSIZE] language; 706 if (GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, language.ptr, PROPSIZE) == 0) { 707 static if (_WIN32_WINNT >= 0x0600) { 708 if(GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME2, language.ptr, PROPSIZE) == 0) { 709 language[0] = '\0'; 710 } 711 } else { 712 language[0] = '\0'; 713 } 714 } 715 716 len = strlen(language.ptr); 717 if(len == 0) { 718 /* defaults to en_US */ 719 locale.language = "en"; 720 locale.country = "US"; 721 } else { 722 locale.language = cast(string)language[0..len].dup; 723 len = strlen(country.ptr); 724 if(len > 0) 725 locale.country = cast(string)country[0..len].dup; 726 } 727 // writefln("language=[%s], %s", language, locale.language); 728 // writefln("country=[%s], %s", country, locale.country); 729 730 /* variant */ 731 732 /* handling for Norwegian */ 733 if (locale.language == "nb") { 734 locale.language = "no"; 735 locale.country = "NO"; 736 } else if (locale.language == "nn") { 737 locale.language = "no"; 738 locale.country = "NO"; 739 locale.variant = "NY"; 740 } 741 742 /* encoding */ 743 locale.encoding = getEncodingInternal(lcid); 744 return locale; 745 } 746 747 static string getEncodingFromLangID(LANGID langID) { 748 return getEncodingInternal(MAKELCID(langID, SORT_DEFAULT)); 749 } 750 751 private static string getEncodingInternal(LCID lcid) { 752 string encoding; 753 int codepage; 754 char[16] ret; 755 if (GetLocaleInfoA(lcid, LOCALE_IDEFAULTANSICODEPAGE, 756 ret.ptr, 14) == 0) { 757 codepage = 1252; 758 } else { 759 codepage = atoi(ret.ptr); 760 } 761 // import std.stdio; 762 // writefln("codepage=%d, ret: [%(%02X %)]", codepage, cast(ubyte[])ret); 763 764 size_t len = strlen(ret.ptr); 765 switch (codepage) { 766 case 0: 767 encoding = "UTF-8"; 768 break; 769 case 874: /* 9:Thai */ 770 case 932: /* 10:Japanese */ 771 case 949: /* 12:Korean Extended Wansung */ 772 case 950: /* 13:Chinese (Taiwan, Hongkong, Macau) */ 773 case 1361: /* 15:Korean Johab */ 774 encoding = "MS" ~ cast(string)ret[0..len].dup; 775 break; 776 case 936: 777 encoding = "GBK"; 778 break; 779 case 54936: 780 encoding = "GB18030"; 781 break; 782 default: 783 encoding = "Cp" ~ cast(string)ret[0..len].dup; 784 break; 785 } 786 787 //Traditional Chinese Windows should use MS950_HKSCS_XP as the 788 //default encoding, if HKSCS patch has been installed. 789 // "old" MS950 0xfa41 -> u+e001 790 // "new" MS950 0xfa41 -> u+92db 791 if (encoding == "MS950") { 792 CHAR[2] mbChar = [cast(char)0xfa, cast(char)0x41]; 793 WCHAR unicodeChar; 794 MultiByteToWideChar(CP_ACP, 0, mbChar.ptr, 2, &unicodeChar, 1); 795 if (unicodeChar == 0x92db) { 796 encoding = "MS950_HKSCS_XP"; 797 } 798 } else { 799 //SimpChinese Windows should use GB18030 as the default 800 //encoding, if gb18030 patch has been installed (on windows 801 //2000/XP, (1)Codepage 54936 will be available 802 //(2)simsun18030.ttc will exist under system fonts dir ) 803 if (encoding == "GBK" && IsValidCodePage(54936)) { 804 char[MAX_PATH + 1] systemPath; 805 enum string gb18030Font = "\\FONTS\\SimSun18030.ttc"; 806 // if(GetWindowsDirectory(systemPath.ptr, MAX_PATH + 1) != 0) { 807 // import std.path; 808 // import std.file; 809 810 // } 811 812 FILE *f = NULL; 813 if (GetWindowsDirectoryA(systemPath.ptr, MAX_PATH + 1) != 0 && 814 strlen(systemPath.ptr) + gb18030Font.length < MAX_PATH + 1) { 815 strcat(systemPath.ptr, gb18030Font); 816 if ((f = fopen(systemPath.ptr, "r")) != NULL) { 817 fclose(f); 818 encoding = "GB18030"; 819 } 820 } 821 } 822 } 823 824 return encoding; 825 } 826 } 827 828 } else { 829 static assert(false, "Unsupported OS"); 830 }