1 module hunt.system.Locale;
2 
3 import hunt.Functions;
4 import hunt.logging;
5 
6 import core.stdc.locale;
7 import std.format;
8 import std.process;
9 import std.string;
10 
11 // dfmt off
12 version(Posix) {
13 
14     enum _NL_CTYPE_CODESET_NAME = 14;
15     alias CODESET = _NL_CTYPE_CODESET_NAME;
16 
17     extern(C) pure nothrow @nogc {
18         char * nl_langinfo (int __item);
19     }
20 
21 version(linux) {
22     /*
23      * Mappings from partial locale names to full locale names
24      */    
25 	enum string[string] localeAliases = [
26 		"ar" : "ar_EG",
27 		"be" : "be_BY",
28 		"bg" : "bg_BG",
29 		"br" : "br_FR",
30 		"ca" : "ca_ES",
31 		"cs" : "cs_CZ",
32 		"cz" : "cs_CZ",
33 		"da" : "da_DK",
34 		"de" : "de_DE",
35 		"el" : "el_GR",
36 		"en" : "en_US",
37 		"eo" : "eo",    /* no country for Esperanto */
38 		"es" : "es_ES",
39 		"et" : "et_EE",
40 		"eu" : "eu_ES",
41 		"fi" : "fi_FI",
42 		"fr" : "fr_FR",
43 		"ga" : "ga_IE",
44 		"gl" : "gl_ES",
45 		"he" : "iw_IL",
46 		"hr" : "hr_HR",
47 
48 		"hs" : "en_US", // used on Linux: not clear what it stands for
49 
50 		"hu" : "hu_HU",
51 		"id" : "in_ID",
52 		"in" : "in_ID",
53 		"is" : "is_IS",
54 		"it" : "it_IT",
55 		"iw" : "iw_IL",
56 		"ja" : "ja_JP",
57 		"kl" : "kl_GL",
58 		"ko" : "ko_KR",
59 		"lt" : "lt_LT",
60 		"lv" : "lv_LV",
61 		"mk" : "mk_MK",
62 		"nl" : "nl_NL",
63 		"no" : "no_NO",
64 		"pl" : "pl_PL",
65 		"pt" : "pt_PT",
66 		"ro" : "ro_RO",
67 		"ru" : "ru_RU",
68 		"se" : "se_NO",
69 		"sk" : "sk_SK",
70 		"sl" : "sl_SI",
71 		"sq" : "sq_AL",
72 		"sr" : "sr_CS",
73 		"su" : "fi_FI",
74 		"sv" : "sv_SE",
75 		"th" : "th_TH",
76 		"tr" : "tr_TR",
77 
78 		"ua" : "en_US", // used on Linux: not clear what it stands for
79 
80 		"uk" : "uk_UA",
81 		"vi" : "vi_VN",
82 		"wa" : "wa_BE",
83 		"zh" : "zh_CN",
84 
85 		"bokmal" : "nb_NO",
86 		"bokm\xE5l" : "nb_NO",
87 		"catalan" : "ca_ES",
88 		"croatian" : "hr_HR",
89 		"czech" : "cs_CZ",
90 		"danish" : "da_DK",
91 		"dansk" : "da_DK",
92 		"deutsch" : "de_DE",
93 		"dutch" : "nl_NL",
94 		"eesti" : "et_EE",
95 		"estonian" : "et_EE",
96 		"finnish" : "fi_FI",
97 		"fran\xE7\x61is" : "fr_FR",
98 		"french" : "fr_FR",
99 		"galego" : "gl_ES",
100 		"galician" : "gl_ES",
101 		"german" : "de_DE",
102 		"greek" : "el_GR",
103 		"hebrew" : "iw_IL",
104 		"hrvatski" : "hr_HR",
105 		"hungarian" : "hu_HU",
106 		"icelandic" : "is_IS",
107 		"italian" : "it_IT",
108 		"japanese" : "ja_JP",
109 		"korean" : "ko_KR",
110 		"lithuanian" : "lt_LT",
111 		"norwegian" : "no_NO",
112 		"nynorsk" : "nn_NO",
113 		"polish" : "pl_PL",
114 		"portuguese" : "pt_PT",
115 		"romanian" : "ro_RO",
116 		"russian" : "ru_RU",
117 		"slovak" : "sk_SK",
118 		"slovene" : "sl_SI",
119 		"slovenian" : "sl_SI",
120 		"spanish" : "es_ES",
121 		"swedish" : "sv_SE",
122 		"thai" : "th_TH",
123 		"turkish" : "tr_TR"
124 	];
125 
126 
127     /*
128      * Linux/Solaris language string to ISO639 string mapping table.
129      */
130     enum string[string] languageNames = [
131         "C" : "en",
132         "POSIX" : "en",
133         "cz" : "cs",
134         "he" : "iw",
135 
136         "hs" : "en",  // used on Linux : not clear what it stands for
137 
138         "id" : "in",
139         "sh" : "sr",  // sh is deprecated
140         "su" : "fi",
141 
142         "ua" : "en",  // used on Linux : not clear what it stands for
143 
144         "catalan" : "ca",
145         "croatian" : "hr",
146         "czech" : "cs",
147         "danish" : "da",
148         "dansk" : "da",
149         "deutsch" : "de",
150         "dutch" : "nl",
151         "finnish" : "fi",
152         "fran\xE7\x61is" : "fr",
153         "french" : "fr",
154         "german" : "de",
155         "greek" : "el",
156         "hebrew" : "he",
157         "hrvatski" : "hr",
158         "hungarian" : "hu",
159         "icelandic" : "is",
160         "italian" : "it",
161         "japanese" : "ja",
162         "norwegian" : "no",
163         "polish" : "pl",
164         "portuguese" : "pt",
165         "romanian" : "ro",
166         "russian" : "ru",
167         "slovak" : "sk",
168         "slovene" : "sl",
169         "slovenian" : "sl",
170         "spanish" : "es",
171         "swedish" : "sv",
172         "turkish" : "tr"
173     ];
174 
175     /*
176      * Linux/Solaris script string to Java script name mapping table.
177      */
178     enum string[string] scriptNames = [
179         "cyrillic" : "Cyrl",
180         "devanagari" : "Deva",
181         "iqtelif" : "Latn",
182         "latin" : "Latn",
183         "Arab" : "Arab",
184         "Cyrl" : "Cyrl",
185         "Deva" : "Deva",
186         "Ethi" : "Ethi",
187         "Hans" : "Hans",
188         "Hant" : "Hant",
189         "Latn" : "Latn",
190         "Sund" : "Sund",
191         "Syrc" : "Syrc",
192         "Tfng" : "Tfng"
193     ];
194 
195     /*
196      * Linux/Solaris country string to ISO3166 string mapping table.
197      */
198     enum string[string] countryNames = [
199         "RN" : "US", // used on Linux : not clear what it stands for
200         "YU" : "CS"  // YU has been removed from ISO 3166
201     ];   
202 
203  } else {
204 
205 	enum string[string] localeAliases = [
206 		"ar" : "ar_EG",
207 		"be" : "be_BY",
208 		"bg" : "bg_BG",
209 		"br" : "br_FR",
210 		"ca" : "ca_ES",
211 		"cs" : "cs_CZ",
212 		"cz" : "cs_CZ",
213 		"da" : "da_DK",
214 		"de" : "de_DE",
215 		"el" : "el_GR",
216 		"en" : "en_US",
217 		"eo" : "eo",    /* no country for Esperanto */
218 		"es" : "es_ES",
219 		"et" : "et_EE",
220 		"eu" : "eu_ES",
221 		"fi" : "fi_FI",
222 		"fr" : "fr_FR",
223 		"ga" : "ga_IE",
224 		"gl" : "gl_ES",
225 		"he" : "iw_IL",
226 		"hr" : "hr_HR",
227 		
228 		"hu" : "hu_HU",
229 		"id" : "in_ID",
230 		"in" : "in_ID",
231 		"is" : "is_IS",
232 		"it" : "it_IT",
233 		"iw" : "iw_IL",
234 		"ja" : "ja_JP",
235 		"kl" : "kl_GL",
236 		"ko" : "ko_KR",
237 		"lt" : "lt_LT",
238 		"lv" : "lv_LV",
239 		"mk" : "mk_MK",
240 		"nl" : "nl_NL",
241 		"no" : "no_NO",
242 		"pl" : "pl_PL",
243 		"pt" : "pt_PT",
244 		"ro" : "ro_RO",
245 		"ru" : "ru_RU",
246 		"se" : "se_NO",
247 		"sk" : "sk_SK",
248 		"sl" : "sl_SI",
249 		"sq" : "sq_AL",
250 		"sr" : "sr_CS",
251 		"su" : "fi_FI",
252 		"sv" : "sv_SE",
253 		"th" : "th_TH",
254 		"tr" : "tr_TR",
255 
256 		"uk" : "uk_UA",
257 		"vi" : "vi_VN",
258 		"wa" : "wa_BE",
259 		"zh" : "zh_CN",
260 
261 		"big5" : "zh_TW.Big5",
262 		"chinese" : "zh_CN",
263 		"iso_8859_1" : "en_US.ISO8859-1",
264 		"iso_8859_15" : "en_US.ISO8859-15",
265 		"japanese" : "ja_JP",
266 		"no_NY" : "no_NO@nynorsk",
267 		"sr_SP" : "sr_YU",
268 		"tchinese" : "zh_TW"
269  	]; 
270 
271 
272     /*
273      * Linux/Solaris language string to ISO639 string mapping table.
274      */
275     enum string[string] languageNames = [
276         "C" : "en",
277         "POSIX" : "en",
278         "cz" : "cs",
279         "he" : "iw",
280 
281         "id" : "in",
282         "sh" : "sr", // sh is deprecated
283         "su" : "fi",
284 
285         "chinese" : "zh",
286         "japanese" : "ja",
287         "korean" : "ko"
288     ];
289 
290     /*
291      * Linux/Solaris script string to Java script name mapping table.
292      */
293     enum string[string] scriptNames = [
294         "Arab" : "Arab",
295         "Cyrl" : "Cyrl",
296         "Deva" : "Deva",
297         "Ethi" : "Ethi",
298         "Hans" : "Hans",
299         "Hant" : "Hant",
300         "Latn" : "Latn",
301         "Sund" : "Sund",
302         "Syrc" : "Syrc",
303         "Tfng" : "Tfng"
304     ];
305 
306     /*
307      * Linux/Solaris country string to ISO3166 string mapping table.
308      */
309     enum string[string] countryNames = [
310         "YU" : "CS"  // YU has been removed from ISO 3166
311     ]; 
312  }
313 
314 enum LocaleCategory {
315     ALL = LC_ALL,
316     COLLATE = LC_COLLATE,
317     CTYPE = LC_CTYPE,
318     MONETARY  = LC_MONETARY,
319     NUMERIC = LC_NUMERIC,
320     TIME = LC_TIME,
321     MESSAGES = LC_MESSAGES
322 }
323 
324 /*
325     * Linux/Solaris variant string to Java variant name mapping table.
326     */
327 enum string[string] variantNames = [
328     "nynorsk" : "NY",
329 ];
330 
331 // dfmt on
332 
333 /**
334 see_also:
335     https://linux.die.net/man/3/setlocale
336 */
337 class Locale {
338     string language;
339     string country;
340     string encoding;
341     string variant;
342     string script;
343 
344     override string toString() {
345         return format("language=%s, country=%s, encoding=%s, variant=%s, script=%s",
346             language, country, encoding, variant, script);
347     }
348 
349      static Locale getUserDefault() {
350         string info = set(LocaleCategory.ALL);
351         version (HUNT_DEBUG_MORE) {
352             tracef("Locale(ALL):%s ", info);
353         }
354         return query(LocaleCategory.MESSAGES);
355     }
356 
357     static Locale getSystemDefault() {
358         string info = set(LocaleCategory.ALL);
359         return query(LocaleCategory.CTYPE);
360     }
361 
362     static Locale getUserUI() {
363         return getUserDefault();
364     }
365 
366     static string set(LocaleCategory cat, string locale="") {
367         char* p = setlocale(cast(int)cat, locale.toStringz());
368         return cast(string)fromStringz(p);
369     }
370 
371     static Locale query(LocaleCategory cat) {
372         char* lc = setlocale(cast(int)cat, null);
373         if(lc is null) {
374             return null;
375         }
376 
377         string localInfo = cast(string)fromStringz(lc);
378         version(HUNT_DEBUG_MORE) tracef("category=%s, locale: %s", cat, localInfo);
379         return parse(localInfo);
380     }
381 
382     static Locale parse(string localInfo) {
383         string std_language, std_country, std_encoding, std_script, std_variant;
384         // localInfo = "zh_CN.UTF-8@nynorsk";  // for test
385         if(localInfo.empty || localInfo == "C" || localInfo == "POSIX") {
386             localInfo = "en_US";
387         }
388         string temp = localInfo;
389 
390         /*
391          * locale string format in Solaris is
392          * <language name>_<country name>.<encoding name>@<variant name>
393          * <country name>, <encoding name>, and <variant name> are optional.
394          */
395 
396         /* Parse the language, country, encoding, and variant from the
397          * locale.  Any of the elements may be missing, but they must occur
398          * in the order language_country.encoding@variant, and must be
399          * preceded by their delimiter (except for language).
400          *
401          * If the locale name (without .encoding@variant, if any) matches
402          * any of the names in the locale_aliases list, map it to the
403          * corresponding full locale name.  Most of the entries in the
404          * locale_aliases list are locales that include a language name but
405          * no country name, and this facility is used to map each language
406          * to a default country if that's possible.  It's also used to map
407          * the Solaris locale aliases to their proper Java locale IDs.
408          */ 
409         
410         string encoding_variant;
411         /* Copy the leading '.' */
412         ptrdiff_t index = localInfo.indexOf('.');
413         if(index == -1) {
414             /* Copy the leading '@' */
415             index = localInfo.indexOf('@'); 
416         }
417     
418         if(index >= 0) {
419             encoding_variant = localInfo[index .. $];
420             temp = localInfo[0..index];
421         }
422 
423         string language = temp;
424         if(!temp.empty && localeAliases.hasKey(temp)) {
425             language = localeAliases[temp];
426             // check the "encoding_variant" again, if any.
427             index = language.indexOf('.');
428             if(index == -1) {
429                 /* Copy the leading '@' */
430                 index = language.indexOf('@'); 
431             }
432 
433             if(index >= 0) {
434                 encoding_variant = language[index .. $];
435                 language = language[0 .. index];
436             }
437         } 
438 
439         // 
440         string country;
441         index = language.indexOf('_');
442         if(index >= 0) {
443             country = language[index+1 .. $];
444             language = language[0..index];
445         }
446 
447         // 
448         string encoding;
449         index = encoding_variant.indexOf('.');
450         if(index >= 0) {
451             encoding = encoding_variant[index+1 .. $];
452         }
453 
454         // 
455         string variant;
456         index = encoding.indexOf('@');
457         if(index >= 0) {
458             variant = encoding[index+1 .. $];
459             encoding = encoding[0 .. index];
460         }
461 
462         // version(HUNT_DEBUG) {
463         //     tracef("language=%s, country=%s, variant=%s, encoding=%s", 
464         //         language, country, variant, encoding);
465         // }
466 
467         /* Normalize the language name */
468         if(language.empty() ) {
469             std_language = "en";
470         } else if(languageNames.hasKey(language)) {
471             std_language = languageNames[language];
472         } else {
473             std_language = language;
474         }
475 
476         /* Normalize the country name */
477         if(!country.empty()) {
478             if(countryNames.hasKey(country)) {
479                 std_country= countryNames[country];
480             } else {
481                 std_country = country;
482             }
483         }
484 
485         /* Normalize the script and variant name.  Note that we only use
486          * variants listed in the mapping array; others are ignored.
487          */
488         if(scriptNames.hasKey(variant))
489             std_script = scriptNames[variant];
490             
491         if(variantNames.hasKey(variant))
492             std_variant = variantNames[variant];
493 
494         /* Normalize the encoding name.  Note that we IGNORE the string
495          * 'encoding' extracted from the locale name above.  Instead, we use the
496          * more reliable method of calling nl_langinfo(CODESET).  This function
497          * returns an empty string if no encoding is set for the given locale
498          * (e.g., the C or POSIX locales); we use the default ISO 8859-1
499          * converter for such locales.
500          */
501 
502         /* OK, not so reliable - nl_langinfo() gives wrong answers on
503          * Euro locales, in particular. */
504         string p = encoding;
505         if (p != "ISO8859-15") {
506             char * _p = nl_langinfo(CODESET); 
507             p = cast(string)fromStringz(_p);
508         }       
509         /* Convert the bare "646" used on Solaris to a proper IANA name */
510         if (p == "646")
511             p = "ISO646-US";            
512 
513         /* return same result nl_langinfo would return for en_UK,
514          * in order to use optimizations. */
515         if(p.empty)
516             std_encoding = "ISO8859-1";
517         else
518             std_encoding = p;
519 
520         version(linux) {
521             /*
522              * Remap the encoding string to a different value for japanese
523              * locales on linux so that customized converters are used instead
524              * of the default converter for "EUC-JP". The customized converters
525              * omit support for the JIS0212 encoding which is not supported by
526              * the variant of "EUC-JP" encoding used on linux
527              */            
528             if (p == "EUC-JP") std_encoding = "EUC-JP-LINUX";
529         } else {
530             if (p == "eucJP") {
531                 /* For Solaris use customized vendor defined character
532                  * customized EUC-JP converter
533                  */
534                 std_encoding = "eucJP-open";
535             } else if (p == "Big5" || p == "BIG5") {
536                 /*
537                  * Remap the encoding string to Big5_Solaris which augments
538                  * the default converter for Solaris Big5 locales to include
539                  * seven additional ideographic characters beyond those included
540                  * in the Java "Big5" converter.
541                  */
542                 std_encoding = "Big5_Solaris";
543             } else if (p == "Big5-HKSCS") {
544                 /*
545                  * Solaris uses HKSCS2001
546                  */
547                 std_encoding = "Big5-HKSCS-2001";
548             }
549         }
550 
551         version(OSX) {
552             /*
553              * For the case on MacOS X where encoding is set to US-ASCII, but we
554              * don't have any encoding hints from LANG/LC_ALL/LC_CTYPE, use UTF-8
555              * instead.
556              *
557              * The contents of ASCII files will still be read and displayed
558              * correctly, but so will files containing UTF-8 characters beyond the
559              * standard ASCII range.
560              *
561              * Specifically, this allows apps launched by double-clicking a .jar
562              * file to correctly read UTF-8 files using the default encoding (see
563              * 8011194).
564              */
565             string lang = environment.get("LANG", "");
566             string lcall = environment.get("LC_ALL", "");
567             string lctype = environment.get("LC_CTYPE", "");
568             if (p == "US-ASCII" && lang.empty() &&
569                 lcall.empty() && lctype.empty()) {
570                 std_encoding = "UTF-8";
571             }            
572         }
573 
574         Locale locale = new Locale();
575         locale.language = std_language;
576         locale.country = std_country;
577         locale.encoding = std_encoding;
578         locale.variant = std_variant;
579         locale.script = std_script;
580 
581         return locale;
582     }
583 }
584 
585 } else version(Windows) {
586 
587 import core.sys.windows.winbase;
588 import core.sys.windows.w32api;
589 import core.sys.windows.winnls;
590 import core.sys.windows.winnt;
591 import core.stdc.stdio;
592 import core.stdc.stdlib;
593 import core.stdc.string;
594 
595 // dfmt off
596 static if (_WIN32_WINNT >= 0x0600) {
597     enum : LCTYPE {
598         LOCALE_SNAME                  = 0x0000005c,   // locale name (ie: en-us)
599         LOCALE_SDURATION              = 0x0000005d,   // time duration format, eg "hh:mm:ss"
600         LOCALE_SSHORTESTDAYNAME1      = 0x00000060,   // Shortest day name for Monday
601         LOCALE_SSHORTESTDAYNAME2      = 0x00000061,   // Shortest day name for Tuesday
602         LOCALE_SSHORTESTDAYNAME3      = 0x00000062,   // Shortest day name for Wednesday
603         LOCALE_SSHORTESTDAYNAME4      = 0x00000063,   // Shortest day name for Thursday
604         LOCALE_SSHORTESTDAYNAME5      = 0x00000064,   // Shortest day name for Friday
605         LOCALE_SSHORTESTDAYNAME6      = 0x00000065,   // Shortest day name for Saturday
606         LOCALE_SSHORTESTDAYNAME7      = 0x00000066,   // Shortest day name for Sunday
607         LOCALE_SISO639LANGNAME2       = 0x00000067,   // 3 character ISO abbreviated language name, eg "eng"
608         LOCALE_SISO3166CTRYNAME2      = 0x00000068,   // 3 character ISO country/region name, eg "USA"
609         LOCALE_SNAN                   = 0x00000069,   // Not a Number, eg "NaN"
610         LOCALE_SPOSINFINITY           = 0x0000006a,   // + Infinity, eg "infinity"
611         LOCALE_SNEGINFINITY           = 0x0000006b,   // - Infinity, eg "-infinity"
612         LOCALE_SSCRIPTS               = 0x0000006c,   // Typical scripts in the locale: ; delimited script codes, eg "Latn;"
613         LOCALE_SPARENT                = 0x0000006d,   // Fallback name for resources, eg "en" for "en-US"
614         LOCALE_SCONSOLEFALLBACKNAME   = 0x0000006e    // Fallback name for within the console for Unicode Only locales, eg "en" for bn-IN
615     }
616 
617 }
618 
619 // dfmt on
620 
621 /**
622 see_also:
623     https://linux.die.net/man/3/setlocale
624 */
625 class Locale {
626     string language;
627     string country;
628     string encoding;
629     string variant;
630     string script;
631 
632     override string toString() {
633         return format("language=%s, country=%s, encoding=%s, variant=%s, script=%s",
634             language, country, encoding, variant, script);
635     }
636     
637     static Locale getUserDefault() {
638         /*
639          * query the system for the current system default locale
640          * (which is a Windows LCID value),
641          */
642         LCID userDefaultLCID = GetUserDefaultLCID();
643         return query(userDefaultLCID);
644     }
645 
646     static Locale getSystemDefault() {
647         LCID systemDefaultLCID = GetSystemDefaultLCID();
648         return query(systemDefaultLCID);
649     }
650 
651     static Locale getUserUI() {
652         LCID userDefaultLCID = GetUserDefaultLCID();
653         LCID userDefaultUILang = GetUserDefaultUILanguage();   
654         // Windows UI Language selection list only cares "language"
655         // information of the UI Language. For example, the list
656         // just lists "English" but it actually means "en_US", and
657         // the user cannot select "en_GB" (if exists) in the list.
658         // So, this hack is to use the user LCID region information
659         // for the UI Language, if the "language" portion of those
660         // two locales are the same.
661         if (PRIMARYLANGID(LANGIDFROMLCID(userDefaultLCID)) ==
662             PRIMARYLANGID(LANGIDFROMLCID(userDefaultUILang))) {
663             userDefaultUILang = userDefaultLCID;
664         }
665         return query(userDefaultUILang);        
666     }
667 
668     static Locale query(LCID lcid) {
669         Locale locale = new Locale();
670 
671         enum PROPSIZE = 9;      // eight-letter + null terminator
672         enum SNAMESIZE = 86;    // max number of chars for LOCALE_SNAME is 85
673 
674         size_t len;
675         static if (_WIN32_WINNT >= 0x0600) {
676             /* script */
677             char[SNAMESIZE] tmp;
678             char[PROPSIZE] script;
679             if (GetLocaleInfoA(lcid, LOCALE_SNAME, tmp.ptr, SNAMESIZE) == 0) {
680                 script[0] = '\0';
681             } else if(sscanf(tmp.ptr, "%*[a-z\\-]%1[A-Z]%[a-z]", script.ptr, script.ptr+1) == 0) {
682                 script[0] = '\0';
683             }
684             
685             // writefln("script=[%s]", script);
686             len = strlen(script.ptr);
687             if(len == 4) {
688                 locale.script = cast(string)fromStringz(script.ptr);
689             }
690         }
691 
692         /* country */
693         char[PROPSIZE] country;
694         if (GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, country.ptr, PROPSIZE) == 0 ) {
695             static if (_WIN32_WINNT >= 0x0600) {
696                 if(GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME2, country.ptr, PROPSIZE) == 0) {
697                     country[0] = '\0';
698                 }
699             } else {
700                 country[0] = '\0';
701             }
702         }
703 
704         /* language */
705         char[PROPSIZE] language;
706         if (GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, language.ptr, PROPSIZE) == 0) {
707             static if (_WIN32_WINNT >= 0x0600) {
708                 if(GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME2, language.ptr, PROPSIZE) == 0) {
709                     language[0] = '\0';
710                 }
711             } else {
712                 language[0] = '\0';
713             }
714         }
715 
716         len = strlen(language.ptr);
717         if(len == 0) {
718             /* defaults to en_US */
719             locale.language = "en";
720             locale.country = "US";
721         } else {
722             locale.language = cast(string)language[0..len].dup;
723             len = strlen(country.ptr);
724             if(len > 0)
725                 locale.country = cast(string)country[0..len].dup;
726         }
727         // writefln("language=[%s], %s", language, locale.language);
728         // writefln("country=[%s], %s", country, locale.country);
729 
730         /* variant */
731 
732         /* handling for Norwegian */
733         if (locale.language == "nb") {
734             locale.language = "no";
735             locale.country = "NO";
736         } else if (locale.language == "nn") {
737             locale.language = "no";
738             locale.country = "NO";
739             locale.variant = "NY";
740         }
741 
742         /* encoding */
743         locale.encoding = getEncodingInternal(lcid);
744         return locale;
745     }       
746     
747     static string getEncodingFromLangID(LANGID langID) {
748         return getEncodingInternal(MAKELCID(langID, SORT_DEFAULT));
749     }
750 
751     private static string getEncodingInternal(LCID lcid) {
752         string encoding;
753         int codepage;
754         char[16] ret;
755         if (GetLocaleInfoA(lcid, LOCALE_IDEFAULTANSICODEPAGE,
756                         ret.ptr, 14) == 0) {
757             codepage = 1252;
758         } else {
759             codepage = atoi(ret.ptr);
760         }
761         // import std.stdio;
762         // writefln("codepage=%d, ret: [%(%02X %)]", codepage, cast(ubyte[])ret);
763 
764         size_t len = strlen(ret.ptr);
765         switch (codepage) {
766         case 0:
767             encoding = "UTF-8";
768             break;
769         case 874:     /*  9:Thai     */
770         case 932:     /* 10:Japanese */
771         case 949:     /* 12:Korean Extended Wansung */
772         case 950:     /* 13:Chinese (Taiwan, Hongkong, Macau) */
773         case 1361:    /* 15:Korean Johab */
774             encoding = "MS" ~ cast(string)ret[0..len].dup;
775             break;
776         case 936:
777             encoding = "GBK";
778             break;
779         case 54936:
780             encoding = "GB18030";
781             break;
782         default:
783             encoding = "Cp" ~ cast(string)ret[0..len].dup;
784             break;
785         }
786 
787         //Traditional Chinese Windows should use MS950_HKSCS_XP as the
788         //default encoding, if HKSCS patch has been installed.
789         // "old" MS950 0xfa41 -> u+e001
790         // "new" MS950 0xfa41 -> u+92db
791         if (encoding == "MS950") {
792             CHAR[2]  mbChar = [cast(char)0xfa, cast(char)0x41];
793             WCHAR  unicodeChar;
794             MultiByteToWideChar(CP_ACP, 0, mbChar.ptr, 2, &unicodeChar, 1);
795             if (unicodeChar == 0x92db) {
796                 encoding = "MS950_HKSCS_XP";
797             }
798         } else {
799             //SimpChinese Windows should use GB18030 as the default
800             //encoding, if gb18030 patch has been installed (on windows
801             //2000/XP, (1)Codepage 54936 will be available
802             //(2)simsun18030.ttc will exist under system fonts dir )
803             if (encoding == "GBK" && IsValidCodePage(54936)) {
804                 char[MAX_PATH + 1] systemPath;
805                 enum string gb18030Font = "\\FONTS\\SimSun18030.ttc";
806                 // if(GetWindowsDirectory(systemPath.ptr, MAX_PATH + 1) != 0) {
807                 //     import std.path;
808                 //     import std.file;
809                     
810                 // }
811 
812                 FILE *f = NULL;
813                 if (GetWindowsDirectoryA(systemPath.ptr, MAX_PATH + 1) != 0 &&
814                     strlen(systemPath.ptr) + gb18030Font.length < MAX_PATH + 1) {
815                     strcat(systemPath.ptr, gb18030Font);
816                     if ((f = fopen(systemPath.ptr, "r")) != NULL) {
817                         fclose(f);
818                         encoding = "GB18030";
819                     }
820                 }
821             }
822         }
823 
824         return encoding;
825     }
826 } 
827 
828 } else {
829     static assert(false, "Unsupported OS");
830 }