1 /*
2  * Hunt - A refined core library for D programming language.
3  *
4  * Copyright (C) 2018-2019 HuntLabs
5  *
6  * Website: https://www.huntlabs.net/
7  *
8  * Licensed under the Apache-2.0 License.
9  *
10  */
11 
12 module hunt.text.CharacterData.CharacterDataLatin1;
13 
14 import hunt.Char;
15 import hunt.text.CharacterData.CharacterData;
16 
17 
18 /** The CharacterData class encapsulates the large tables found in
19     Java.lang.Char. */
20 
21 class CharacterDataLatin1 : CharacterData {
22 
23     /* The character properties are currently encoded into 32 bits in the following manner:
24         1 bit   mirrored property
25         4 bits  directionality property
26         9 bits  signed offset used for converting case
27         1 bit   if 1, adding the signed offset converts the character to lowercase
28         1 bit   if 1, subtracting the signed offset converts the character to uppercase
29         1 bit   if 1, this character has a titlecase equivalent (possibly itself)
30         3 bits  0  may not be part of an identifier
31                 1  ignorable control; may continue a Unicode identifier or Java identifier
32                 2  may continue a Java identifier but not a Unicode identifier (unused)
33                 3  may continue a Unicode identifier or Java identifier
34                 4  is a Java whitespace character
35                 5  may start or continue a Java identifier;
36                    may continue but not start a Unicode identifier (underscores)
37                 6  may start or continue a Java identifier but not a Unicode identifier ($)
38                 7  may start or continue a Unicode identifier or Java identifier
39                 Thus:
40                    5, 6, 7 may start a Java identifier
41                    1, 2, 3, 5, 6, 7 may continue a Java identifier
42                    7 may start a Unicode identifier
43                    1, 3, 5, 7 may continue a Unicode identifier
44                    1 is ignorable within an identifier
45                    4 is Java whitespace
46         2 bits  0  this character has no numeric property
47                 1  adding the digit offset to the character code and then
48                    masking with 0x1F will produce the desired numeric value
49                 2  this character has a "strange" numeric value
50                 3  a Java supradecimal digit: adding the digit offset to the
51                    character code, then masking with 0x1F, then adding 10
52                    will produce the desired numeric value
53         5 bits  digit offset
54         5 bits  character type
55 
56         The encoding of character properties is subject to change at any time.
57      */
58 
59     override int getProperties(int ch) {
60         char offset = cast(char)ch;
61         int props = A[offset];
62         return props;
63     }
64 
65     int getPropertiesEx(int ch) {
66         char offset = cast(char)ch;
67         int props = B[offset];
68         return props;
69     }
70 
71     override bool isOtherLowercase(int ch) {
72         int props = getPropertiesEx(ch);
73         return (props & 0x0001) != 0;
74     }
75 
76     override bool isOtherUppercase(int ch) {
77         int props = getPropertiesEx(ch);
78         return (props & 0x0002) != 0;
79     }
80 
81     override bool isOtherAlphabetic(int ch) {
82         int props = getPropertiesEx(ch);
83         return (props & 0x0004) != 0;
84     }
85 
86     override bool isIdeographic(int ch) {
87         int props = getPropertiesEx(ch);
88         return (props & 0x0010) != 0;
89     }
90 
91     override int getType(int ch) {
92         int props = getProperties(ch);
93         return (props & 0x1F);
94     }
95 
96     override bool isJavaIdentifierStart(int ch) {
97         int props = getProperties(ch);
98         return ((props & 0x00007000) >= 0x00005000);
99     }
100 
101     override bool isJavaIdentifierPart(int ch) {
102         int props = getProperties(ch);
103         return ((props & 0x00003000) != 0);
104     }
105 
106     override bool isUnicodeIdentifierStart(int ch) {
107         int props = getProperties(ch);
108         return ((props & 0x00007000) == 0x00007000);
109     }
110 
111     override bool isUnicodeIdentifierPart(int ch) {
112         int props = getProperties(ch);
113         return ((props & 0x00001000) != 0);
114     }
115 
116     override bool isIdentifierIgnorable(int ch) {
117         int props = getProperties(ch);
118         return ((props & 0x00007000) == 0x00001000);
119     }
120 
121     override int toLowerCase(int ch) {
122         int mapChar = ch;
123         int val = getProperties(ch);
124 
125         if (((val & 0x00020000) != 0) && 
126                 ((val & 0x07FC0000) != 0x07FC0000)) { 
127             int offset = val << 5 >> (5+18);
128             mapChar = ch + offset;
129         }
130         return mapChar;
131     }
132 
133     override int toUpperCase(int ch) {
134         int mapChar = ch;
135         int val = getProperties(ch);
136 
137         if ((val & 0x00010000) != 0) {
138             if ((val & 0x07FC0000) != 0x07FC0000) {
139                 int offset = val  << 5 >> (5+18);
140                 mapChar =  ch - offset;
141             } else if (ch == 0x00B5) {
142                 mapChar = 0x039C;
143             }
144         }
145         return mapChar;
146     }
147 
148     override int toTitleCase(int ch) {
149         return toUpperCase(ch);
150     }
151 
152     override int digit(int ch, int radix) {
153         int value = -1;
154         if (radix >= Char.MIN_RADIX && radix <= Char.MAX_RADIX) {
155             int val = getProperties(ch);
156             int kind = val & 0x1F;
157             if (kind == Char.DECIMAL_DIGIT_NUMBER) {
158                 value = ch + ((val & 0x3E0) >> 5) & 0x1F;
159             }
160             else if ((val & 0xC00) == 0x00000C00) {
161                 // Java supradecimal digit
162                 value = (ch + ((val & 0x3E0) >> 5) & 0x1F) + 10;
163             }
164         }
165         return (value < radix) ? value : -1;
166     }
167 
168     override int getNumericValue(int ch) {
169         int val = getProperties(ch);
170         int retval = -1;
171 
172         switch (val & 0xC00) {
173             case (0x00000000):         // not numeric
174                 retval = -1;
175                 break;
176             case (0x00000400):              // simple numeric
177                 retval = ch + ((val & 0x3E0) >> 5) & 0x1F;
178                 break;
179             case (0x00000800)      :       // "strange" numeric
180                  retval = -2; 
181                  break;
182             case (0x00000C00):           // Java supradecimal
183                 retval = (ch + ((val & 0x3E0) >> 5) & 0x1F) + 10;
184                 break;
185             
186             default: 
187                 assert(false, "cannot occur");
188         }
189         return retval;
190     }
191 
192     override bool isWhitespace(int ch) {
193         int props = getProperties(ch);
194         return ((props & 0x00007000) == 0x00004000);
195     }
196 
197     override byte getDirectionality(int ch) {
198         int val = getProperties(ch);
199         byte directionality = cast(byte)((val & 0x78000000) >> 27);
200 
201         if (directionality == 0xF ) {
202             directionality = -1;
203         }
204         return directionality;
205     }
206 
207     override bool isMirrored(int ch) {
208         int props = getProperties(ch);
209         return ((props & 0x80000000) != 0);
210     }
211 
212     override int toUpperCaseEx(int ch) {
213         int mapChar = ch;
214         int val = getProperties(ch);
215 
216         if ((val & 0x00010000) != 0) {
217             if ((val & 0x07FC0000) != 0x07FC0000) {
218                 int offset = val  << 5 >> (5+18);
219                 mapChar =  ch - offset;
220             }
221             else {
222                 switch(ch) {
223                     // map overflow characters
224                     case 0x00B5 : mapChar = 0x039C; break;
225                     default       : mapChar = Char.ERROR; break;
226                 }
227             }
228         }
229         return mapChar;
230     }
231 
232     enum char[] sharpsMap = ['S', 'S'];
233 
234     override char[] toUpperCaseCharArray(int ch) {
235         char[] upperMap = [cast(char)ch];
236         if (ch == 0x00DF) {
237             upperMap = sharpsMap;
238         }
239         return upperMap;
240     }
241 
242     __gshared static CharacterDataLatin1 instance;
243 
244     shared static this() {
245         instance = new CharacterDataLatin1();
246     }
247 
248     private this() {}
249 
250   // The A table has 256 entries for a total of 1024 bytes.
251 
252   enum int[] A = [
253     0x4800100F,  //   0   Cc, ignorable
254     0x4800100F,  //   1   Cc, ignorable
255     0x4800100F,  //   2   Cc, ignorable
256     0x4800100F,  //   3   Cc, ignorable
257     0x4800100F,  //   4   Cc, ignorable
258     0x4800100F,  //   5   Cc, ignorable
259     0x4800100F,  //   6   Cc, ignorable
260     0x4800100F,  //   7   Cc, ignorable
261     0x4800100F,  //   8   Cc, ignorable
262     0x5800400F,  //   9   Cc, S, whitespace
263     0x5000400F,  //  10   Cc, B, whitespace
264     0x5800400F,  //  11   Cc, S, whitespace
265     0x6000400F,  //  12   Cc, WS, whitespace
266     0x5000400F,  //  13   Cc, B, whitespace
267     0x4800100F,  //  14   Cc, ignorable
268     0x4800100F,  //  15   Cc, ignorable
269     0x4800100F,  //  16   Cc, ignorable
270     0x4800100F,  //  17   Cc, ignorable
271     0x4800100F,  //  18   Cc, ignorable
272     0x4800100F,  //  19   Cc, ignorable
273     0x4800100F,  //  20   Cc, ignorable
274     0x4800100F,  //  21   Cc, ignorable
275     0x4800100F,  //  22   Cc, ignorable
276     0x4800100F,  //  23   Cc, ignorable
277     0x4800100F,  //  24   Cc, ignorable
278     0x4800100F,  //  25   Cc, ignorable
279     0x4800100F,  //  26   Cc, ignorable
280     0x4800100F,  //  27   Cc, ignorable
281     0x5000400F,  //  28   Cc, B, whitespace
282     0x5000400F,  //  29   Cc, B, whitespace
283     0x5000400F,  //  30   Cc, B, whitespace
284     0x5800400F,  //  31   Cc, S, whitespace
285     0x6000400C,  //  32   Zs, WS, whitespace
286     0x68000018,  //  33   Po, ON
287     0x68000018,  //  34   Po, ON
288     0x28000018,  //  35   Po, ET
289     0x2800601A,  //  36   Sc, ET, currency
290     0x28000018,  //  37   Po, ET
291     0x68000018,  //  38   Po, ON
292     0x68000018,  //  39   Po, ON
293     -0x17FFFFEB,  //  40   No, hasUpper (subtract 511), hasLower (add 511), hasTitle, identifier start, supradecimal 31
294     -0x17FFFFEA,  //  41   Nl, hasUpper (subtract 511), hasLower (add 511), hasTitle, identifier start, supradecimal 31
295     0x68000018,  //  42   Po, ON
296     0x20000019,  //  43   Sm, ES
297     0x38000018,  //  44   Po, CS
298     0x20000014,  //  45   Pd, ES
299     0x38000018,  //  46   Po, CS
300     0x38000018,  //  47   Po, CS
301     0x18003609,  //  48   Nd, EN, identifier part, decimal 16
302     0x18003609,  //  49   Nd, EN, identifier part, decimal 16
303     0x18003609,  //  50   Nd, EN, identifier part, decimal 16
304     0x18003609,  //  51   Nd, EN, identifier part, decimal 16
305     0x18003609,  //  52   Nd, EN, identifier part, decimal 16
306     0x18003609,  //  53   Nd, EN, identifier part, decimal 16
307     0x18003609,  //  54   Nd, EN, identifier part, decimal 16
308     0x18003609,  //  55   Nd, EN, identifier part, decimal 16
309     0x18003609,  //  56   Nd, EN, identifier part, decimal 16
310     0x18003609,  //  57   Nd, EN, identifier part, decimal 16
311     0x38000018,  //  58   Po, CS
312     0x68000018,  //  59   Po, ON
313     -0x17FFFFE7,  //  60   Me, hasUpper (subtract 511), hasLower (add 511), hasTitle, identifier start, supradecimal 31
314     0x68000019,  //  61   Sm, ON
315     -0x17FFFFE7,  //  62   Me, hasUpper (subtract 511), hasLower (add 511), hasTitle, identifier start, supradecimal 31
316     0x68000018,  //  63   Po, ON
317     0x68000018,  //  64   Po, ON
318     0x00827FE1,  //  65   Lu, L, hasLower (add 32), identifier start, supradecimal 31
319     0x00827FE1,  //  66   Lu, L, hasLower (add 32), identifier start, supradecimal 31
320     0x00827FE1,  //  67   Lu, L, hasLower (add 32), identifier start, supradecimal 31
321     0x00827FE1,  //  68   Lu, L, hasLower (add 32), identifier start, supradecimal 31
322     0x00827FE1,  //  69   Lu, L, hasLower (add 32), identifier start, supradecimal 31
323     0x00827FE1,  //  70   Lu, L, hasLower (add 32), identifier start, supradecimal 31
324     0x00827FE1,  //  71   Lu, L, hasLower (add 32), identifier start, supradecimal 31
325     0x00827FE1,  //  72   Lu, L, hasLower (add 32), identifier start, supradecimal 31
326     0x00827FE1,  //  73   Lu, L, hasLower (add 32), identifier start, supradecimal 31
327     0x00827FE1,  //  74   Lu, L, hasLower (add 32), identifier start, supradecimal 31
328     0x00827FE1,  //  75   Lu, L, hasLower (add 32), identifier start, supradecimal 31
329     0x00827FE1,  //  76   Lu, L, hasLower (add 32), identifier start, supradecimal 31
330     0x00827FE1,  //  77   Lu, L, hasLower (add 32), identifier start, supradecimal 31
331     0x00827FE1,  //  78   Lu, L, hasLower (add 32), identifier start, supradecimal 31
332     0x00827FE1,  //  79   Lu, L, hasLower (add 32), identifier start, supradecimal 31
333     0x00827FE1,  //  80   Lu, L, hasLower (add 32), identifier start, supradecimal 31
334     0x00827FE1,  //  81   Lu, L, hasLower (add 32), identifier start, supradecimal 31
335     0x00827FE1,  //  82   Lu, L, hasLower (add 32), identifier start, supradecimal 31
336     0x00827FE1,  //  83   Lu, L, hasLower (add 32), identifier start, supradecimal 31
337     0x00827FE1,  //  84   Lu, L, hasLower (add 32), identifier start, supradecimal 31
338     0x00827FE1,  //  85   Lu, L, hasLower (add 32), identifier start, supradecimal 31
339     0x00827FE1,  //  86   Lu, L, hasLower (add 32), identifier start, supradecimal 31
340     0x00827FE1,  //  87   Lu, L, hasLower (add 32), identifier start, supradecimal 31
341     0x00827FE1,  //  88   Lu, L, hasLower (add 32), identifier start, supradecimal 31
342     0x00827FE1,  //  89   Lu, L, hasLower (add 32), identifier start, supradecimal 31
343     0x00827FE1,  //  90   Lu, L, hasLower (add 32), identifier start, supradecimal 31
344     -0x17FFFFEB,  //  91   No, hasUpper (subtract 511), hasLower (add 511), hasTitle, identifier start, supradecimal 31
345     0x68000018,  //  92   Po, ON
346     -0x17FFFFEA,  //  93   Nl, hasUpper (subtract 511), hasLower (add 511), hasTitle, identifier start, supradecimal 31
347     0x6800001B,  //  94   Sk, ON
348     0x68005017,  //  95   Pc, ON, underscore
349     0x6800001B,  //  96   Sk, ON
350     0x00817FE2,  //  97   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
351     0x00817FE2,  //  98   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
352     0x00817FE2,  //  99   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
353     0x00817FE2,  // 100   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
354     0x00817FE2,  // 101   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
355     0x00817FE2,  // 102   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
356     0x00817FE2,  // 103   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
357     0x00817FE2,  // 104   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
358     0x00817FE2,  // 105   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
359     0x00817FE2,  // 106   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
360     0x00817FE2,  // 107   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
361     0x00817FE2,  // 108   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
362     0x00817FE2,  // 109   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
363     0x00817FE2,  // 110   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
364     0x00817FE2,  // 111   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
365     0x00817FE2,  // 112   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
366     0x00817FE2,  // 113   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
367     0x00817FE2,  // 114   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
368     0x00817FE2,  // 115   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
369     0x00817FE2,  // 116   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
370     0x00817FE2,  // 117   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
371     0x00817FE2,  // 118   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
372     0x00817FE2,  // 119   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
373     0x00817FE2,  // 120   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
374     0x00817FE2,  // 121   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
375     0x00817FE2,  // 122   Ll, L, hasUpper (subtract 32), identifier start, supradecimal 31
376     -0x17FFFFEB,  // 123   No, hasUpper (subtract 511), hasLower (add 511), hasTitle, identifier start, supradecimal 31
377     0x68000019,  // 124   Sm, ON
378     -0x17FFFFEA,  // 125   Nl, hasUpper (subtract 511), hasLower (add 511), hasTitle, identifier start, supradecimal 31
379     0x68000019,  // 126   Sm, ON
380     0x4800100F,  // 127   Cc, ignorable
381     0x4800100F,  // 128   Cc, ignorable
382     0x4800100F,  // 129   Cc, ignorable
383     0x4800100F,  // 130   Cc, ignorable
384     0x4800100F,  // 131   Cc, ignorable
385     0x4800100F,  // 132   Cc, ignorable
386     0x5000100F,  // 133   Cc, B, ignorable
387     0x4800100F,  // 134   Cc, ignorable
388     0x4800100F,  // 135   Cc, ignorable
389     0x4800100F,  // 136   Cc, ignorable
390     0x4800100F,  // 137   Cc, ignorable
391     0x4800100F,  // 138   Cc, ignorable
392     0x4800100F,  // 139   Cc, ignorable
393     0x4800100F,  // 140   Cc, ignorable
394     0x4800100F,  // 141   Cc, ignorable
395     0x4800100F,  // 142   Cc, ignorable
396     0x4800100F,  // 143   Cc, ignorable
397     0x4800100F,  // 144   Cc, ignorable
398     0x4800100F,  // 145   Cc, ignorable
399     0x4800100F,  // 146   Cc, ignorable
400     0x4800100F,  // 147   Cc, ignorable
401     0x4800100F,  // 148   Cc, ignorable
402     0x4800100F,  // 149   Cc, ignorable
403     0x4800100F,  // 150   Cc, ignorable
404     0x4800100F,  // 151   Cc, ignorable
405     0x4800100F,  // 152   Cc, ignorable
406     0x4800100F,  // 153   Cc, ignorable
407     0x4800100F,  // 154   Cc, ignorable
408     0x4800100F,  // 155   Cc, ignorable
409     0x4800100F,  // 156   Cc, ignorable
410     0x4800100F,  // 157   Cc, ignorable
411     0x4800100F,  // 158   Cc, ignorable
412     0x4800100F,  // 159   Cc, ignorable
413     0x3800000C,  // 160   Zs, CS
414     0x68000018,  // 161   Po, ON
415     0x2800601A,  // 162   Sc, ET, currency
416     0x2800601A,  // 163   Sc, ET, currency
417     0x2800601A,  // 164   Sc, ET, currency
418     0x2800601A,  // 165   Sc, ET, currency
419     0x6800001C,  // 166   So, ON
420     0x68000018,  // 167   Po, ON
421     0x6800001B,  // 168   Sk, ON
422     0x6800001C,  // 169   So, ON
423     -0xFFFF8FFB,  // 170   Sk, hasUpper (subtract 511), hasLower (add 511), hasTitle, supradecimal 31
424     -0x17FFFFE3,  // 171   Lt, hasUpper (subtract 511), hasLower (add 511), hasTitle, identifier start, supradecimal 31
425     0x68000019,  // 172   Sm, ON
426     0x48001010,  // 173   Cf, ignorable
427     0x6800001C,  // 174   So, ON
428     0x6800001B,  // 175   Sk, ON
429     0x2800001C,  // 176   So, ET
430     0x28000019,  // 177   Sm, ET
431     0x1800060B,  // 178   No, EN, decimal 16
432     0x1800060B,  // 179   No, EN, decimal 16
433     0x6800001B,  // 180   Sk, ON
434     0x07FD7002,  // 181   Ll, L, hasUpper (subtract 511), identifier start
435     0x68000018,  // 182   Po, ON
436     0x68000018,  // 183   Po, ON
437     0x6800001B,  // 184   Sk, ON
438     0x1800050B,  // 185   No, EN, decimal 8
439     -0xFFFF8FFB,  // 186   Sk, hasUpper (subtract 511), hasLower (add 511), hasTitle, supradecimal 31
440     -0x17FFFFE2,  // 187   Ll, hasUpper (subtract 511), hasLower (add 511), hasTitle, identifier start, supradecimal 31
441     0x6800080B,  // 188   No, ON, strange
442     0x6800080B,  // 189   No, ON, strange
443     0x6800080B,  // 190   No, ON, strange
444     0x68000018,  // 191   Po, ON
445     0x00827001,  // 192   Lu, L, hasLower (add 32), identifier start
446     0x00827001,  // 193   Lu, L, hasLower (add 32), identifier start
447     0x00827001,  // 194   Lu, L, hasLower (add 32), identifier start
448     0x00827001,  // 195   Lu, L, hasLower (add 32), identifier start
449     0x00827001,  // 196   Lu, L, hasLower (add 32), identifier start
450     0x00827001,  // 197   Lu, L, hasLower (add 32), identifier start
451     0x00827001,  // 198   Lu, L, hasLower (add 32), identifier start
452     0x00827001,  // 199   Lu, L, hasLower (add 32), identifier start
453     0x00827001,  // 200   Lu, L, hasLower (add 32), identifier start
454     0x00827001,  // 201   Lu, L, hasLower (add 32), identifier start
455     0x00827001,  // 202   Lu, L, hasLower (add 32), identifier start
456     0x00827001,  // 203   Lu, L, hasLower (add 32), identifier start
457     0x00827001,  // 204   Lu, L, hasLower (add 32), identifier start
458     0x00827001,  // 205   Lu, L, hasLower (add 32), identifier start
459     0x00827001,  // 206   Lu, L, hasLower (add 32), identifier start
460     0x00827001,  // 207   Lu, L, hasLower (add 32), identifier start
461     0x00827001,  // 208   Lu, L, hasLower (add 32), identifier start
462     0x00827001,  // 209   Lu, L, hasLower (add 32), identifier start
463     0x00827001,  // 210   Lu, L, hasLower (add 32), identifier start
464     0x00827001,  // 211   Lu, L, hasLower (add 32), identifier start
465     0x00827001,  // 212   Lu, L, hasLower (add 32), identifier start
466     0x00827001,  // 213   Lu, L, hasLower (add 32), identifier start
467     0x00827001,  // 214   Lu, L, hasLower (add 32), identifier start
468     0x68000019,  // 215   Sm, ON
469     0x00827001,  // 216   Lu, L, hasLower (add 32), identifier start
470     0x00827001,  // 217   Lu, L, hasLower (add 32), identifier start
471     0x00827001,  // 218   Lu, L, hasLower (add 32), identifier start
472     0x00827001,  // 219   Lu, L, hasLower (add 32), identifier start
473     0x00827001,  // 220   Lu, L, hasLower (add 32), identifier start
474     0x00827001,  // 221   Lu, L, hasLower (add 32), identifier start
475     0x00827001,  // 222   Lu, L, hasLower (add 32), identifier start
476     0x07FD7002,  // 223   Ll, L, hasUpper (subtract 511), identifier start
477     0x00817002,  // 224   Ll, L, hasUpper (subtract 32), identifier start
478     0x00817002,  // 225   Ll, L, hasUpper (subtract 32), identifier start
479     0x00817002,  // 226   Ll, L, hasUpper (subtract 32), identifier start
480     0x00817002,  // 227   Ll, L, hasUpper (subtract 32), identifier start
481     0x00817002,  // 228   Ll, L, hasUpper (subtract 32), identifier start
482     0x00817002,  // 229   Ll, L, hasUpper (subtract 32), identifier start
483     0x00817002,  // 230   Ll, L, hasUpper (subtract 32), identifier start
484     0x00817002,  // 231   Ll, L, hasUpper (subtract 32), identifier start
485     0x00817002,  // 232   Ll, L, hasUpper (subtract 32), identifier start
486     0x00817002,  // 233   Ll, L, hasUpper (subtract 32), identifier start
487     0x00817002,  // 234   Ll, L, hasUpper (subtract 32), identifier start
488     0x00817002,  // 235   Ll, L, hasUpper (subtract 32), identifier start
489     0x00817002,  // 236   Ll, L, hasUpper (subtract 32), identifier start
490     0x00817002,  // 237   Ll, L, hasUpper (subtract 32), identifier start
491     0x00817002,  // 238   Ll, L, hasUpper (subtract 32), identifier start
492     0x00817002,  // 239   Ll, L, hasUpper (subtract 32), identifier start
493     0x00817002,  // 240   Ll, L, hasUpper (subtract 32), identifier start
494     0x00817002,  // 241   Ll, L, hasUpper (subtract 32), identifier start
495     0x00817002,  // 242   Ll, L, hasUpper (subtract 32), identifier start
496     0x00817002,  // 243   Ll, L, hasUpper (subtract 32), identifier start
497     0x00817002,  // 244   Ll, L, hasUpper (subtract 32), identifier start
498     0x00817002,  // 245   Ll, L, hasUpper (subtract 32), identifier start
499     0x00817002,  // 246   Ll, L, hasUpper (subtract 32), identifier start
500     0x68000019,  // 247   Sm, ON
501     0x00817002,  // 248   Ll, L, hasUpper (subtract 32), identifier start
502     0x00817002,  // 249   Ll, L, hasUpper (subtract 32), identifier start
503     0x00817002,  // 250   Ll, L, hasUpper (subtract 32), identifier start
504     0x00817002,  // 251   Ll, L, hasUpper (subtract 32), identifier start
505     0x00817002,  // 252   Ll, L, hasUpper (subtract 32), identifier start
506     0x00817002,  // 253   Ll, L, hasUpper (subtract 32), identifier start
507     0x00817002,  // 254   Ll, L, hasUpper (subtract 32), identifier start
508     0x061D7002   // 255   Ll, L, hasUpper (subtract 391), identifier start
509   ];
510 
511   // The B table has 256 entries for a total of 512 bytes.
512 
513   enum char[] B = [
514     0x0000,  //   0   unassigned, L
515     0x0000,  //   1   unassigned, L
516     0x0000,  //   2   unassigned, L
517     0x0000,  //   3   unassigned, L
518     0x0000,  //   4   unassigned, L
519     0x0000,  //   5   unassigned, L
520     0x0000,  //   6   unassigned, L
521     0x0000,  //   7   unassigned, L
522     0x0000,  //   8   unassigned, L
523     0x0000,  //   9   unassigned, L
524     0x0000,  //  10   unassigned, L
525     0x0000,  //  11   unassigned, L
526     0x0000,  //  12   unassigned, L
527     0x0000,  //  13   unassigned, L
528     0x0000,  //  14   unassigned, L
529     0x0000,  //  15   unassigned, L
530     0x0000,  //  16   unassigned, L
531     0x0000,  //  17   unassigned, L
532     0x0000,  //  18   unassigned, L
533     0x0000,  //  19   unassigned, L
534     0x0000,  //  20   unassigned, L
535     0x0000,  //  21   unassigned, L
536     0x0000,  //  22   unassigned, L
537     0x0000,  //  23   unassigned, L
538     0x0000,  //  24   unassigned, L
539     0x0000,  //  25   unassigned, L
540     0x0000,  //  26   unassigned, L
541     0x0000,  //  27   unassigned, L
542     0x0000,  //  28   unassigned, L
543     0x0000,  //  29   unassigned, L
544     0x0000,  //  30   unassigned, L
545     0x0000,  //  31   unassigned, L
546     0x0000,  //  32   unassigned, L
547     0x0000,  //  33   unassigned, L
548     0x0000,  //  34   unassigned, L
549     0x0000,  //  35   unassigned, L
550     0x0000,  //  36   unassigned, L
551     0x0000,  //  37   unassigned, L
552     0x0000,  //  38   unassigned, L
553     0x0000,  //  39   unassigned, L
554     0x0000,  //  40   unassigned, L
555     0x0000,  //  41   unassigned, L
556     0x0000,  //  42   unassigned, L
557     0x0000,  //  43   unassigned, L
558     0x0000,  //  44   unassigned, L
559     0x0000,  //  45   unassigned, L
560     0x0000,  //  46   unassigned, L
561     0x0000,  //  47   unassigned, L
562     0x0000,  //  48   unassigned, L
563     0x0000,  //  49   unassigned, L
564     0x0000,  //  50   unassigned, L
565     0x0000,  //  51   unassigned, L
566     0x0000,  //  52   unassigned, L
567     0x0000,  //  53   unassigned, L
568     0x0000,  //  54   unassigned, L
569     0x0000,  //  55   unassigned, L
570     0x0000,  //  56   unassigned, L
571     0x0000,  //  57   unassigned, L
572     0x0000,  //  58   unassigned, L
573     0x0000,  //  59   unassigned, L
574     0x0000,  //  60   unassigned, L
575     0x0000,  //  61   unassigned, L
576     0x0000,  //  62   unassigned, L
577     0x0000,  //  63   unassigned, L
578     0x0000,  //  64   unassigned, L
579     0x0000,  //  65   unassigned, L
580     0x0000,  //  66   unassigned, L
581     0x0000,  //  67   unassigned, L
582     0x0000,  //  68   unassigned, L
583     0x0000,  //  69   unassigned, L
584     0x0000,  //  70   unassigned, L
585     0x0000,  //  71   unassigned, L
586     0x0000,  //  72   unassigned, L
587     0x0000,  //  73   unassigned, L
588     0x0000,  //  74   unassigned, L
589     0x0000,  //  75   unassigned, L
590     0x0000,  //  76   unassigned, L
591     0x0000,  //  77   unassigned, L
592     0x0000,  //  78   unassigned, L
593     0x0000,  //  79   unassigned, L
594     0x0000,  //  80   unassigned, L
595     0x0000,  //  81   unassigned, L
596     0x0000,  //  82   unassigned, L
597     0x0000,  //  83   unassigned, L
598     0x0000,  //  84   unassigned, L
599     0x0000,  //  85   unassigned, L
600     0x0000,  //  86   unassigned, L
601     0x0000,  //  87   unassigned, L
602     0x0000,  //  88   unassigned, L
603     0x0000,  //  89   unassigned, L
604     0x0000,  //  90   unassigned, L
605     0x0000,  //  91   unassigned, L
606     0x0000,  //  92   unassigned, L
607     0x0000,  //  93   unassigned, L
608     0x0000,  //  94   unassigned, L
609     0x0000,  //  95   unassigned, L
610     0x0000,  //  96   unassigned, L
611     0x0000,  //  97   unassigned, L
612     0x0000,  //  98   unassigned, L
613     0x0000,  //  99   unassigned, L
614     0x0000,  // 100   unassigned, L
615     0x0000,  // 101   unassigned, L
616     0x0000,  // 102   unassigned, L
617     0x0000,  // 103   unassigned, L
618     0x0000,  // 104   unassigned, L
619     0x0000,  // 105   unassigned, L
620     0x0000,  // 106   unassigned, L
621     0x0000,  // 107   unassigned, L
622     0x0000,  // 108   unassigned, L
623     0x0000,  // 109   unassigned, L
624     0x0000,  // 110   unassigned, L
625     0x0000,  // 111   unassigned, L
626     0x0000,  // 112   unassigned, L
627     0x0000,  // 113   unassigned, L
628     0x0000,  // 114   unassigned, L
629     0x0000,  // 115   unassigned, L
630     0x0000,  // 116   unassigned, L
631     0x0000,  // 117   unassigned, L
632     0x0000,  // 118   unassigned, L
633     0x0000,  // 119   unassigned, L
634     0x0000,  // 120   unassigned, L
635     0x0000,  // 121   unassigned, L
636     0x0000,  // 122   unassigned, L
637     0x0000,  // 123   unassigned, L
638     0x0000,  // 124   unassigned, L
639     0x0000,  // 125   unassigned, L
640     0x0000,  // 126   unassigned, L
641     0x0000,  // 127   unassigned, L
642     0x0000,  // 128   unassigned, L
643     0x0000,  // 129   unassigned, L
644     0x0000,  // 130   unassigned, L
645     0x0000,  // 131   unassigned, L
646     0x0000,  // 132   unassigned, L
647     0x0000,  // 133   unassigned, L
648     0x0000,  // 134   unassigned, L
649     0x0000,  // 135   unassigned, L
650     0x0000,  // 136   unassigned, L
651     0x0000,  // 137   unassigned, L
652     0x0000,  // 138   unassigned, L
653     0x0000,  // 139   unassigned, L
654     0x0000,  // 140   unassigned, L
655     0x0000,  // 141   unassigned, L
656     0x0000,  // 142   unassigned, L
657     0x0000,  // 143   unassigned, L
658     0x0000,  // 144   unassigned, L
659     0x0000,  // 145   unassigned, L
660     0x0000,  // 146   unassigned, L
661     0x0000,  // 147   unassigned, L
662     0x0000,  // 148   unassigned, L
663     0x0000,  // 149   unassigned, L
664     0x0000,  // 150   unassigned, L
665     0x0000,  // 151   unassigned, L
666     0x0000,  // 152   unassigned, L
667     0x0000,  // 153   unassigned, L
668     0x0000,  // 154   unassigned, L
669     0x0000,  // 155   unassigned, L
670     0x0000,  // 156   unassigned, L
671     0x0000,  // 157   unassigned, L
672     0x0000,  // 158   unassigned, L
673     0x0000,  // 159   unassigned, L
674     0x0000,  // 160   unassigned, L
675     0x0000,  // 161   unassigned, L
676     0x0000,  // 162   unassigned, L
677     0x0000,  // 163   unassigned, L
678     0x0000,  // 164   unassigned, L
679     0x0000,  // 165   unassigned, L
680     0x0000,  // 166   unassigned, L
681     0x0000,  // 167   unassigned, L
682     0x0000,  // 168   unassigned, L
683     0x0000,  // 169   unassigned, L
684     0x0001,  // 170   Lu, L
685     0x0000,  // 171   unassigned, L
686     0x0000,  // 172   unassigned, L
687     0x0000,  // 173   unassigned, L
688     0x0000,  // 174   unassigned, L
689     0x0000,  // 175   unassigned, L
690     0x0000,  // 176   unassigned, L
691     0x0000,  // 177   unassigned, L
692     0x0000,  // 178   unassigned, L
693     0x0000,  // 179   unassigned, L
694     0x0000,  // 180   unassigned, L
695     0x0000,  // 181   unassigned, L
696     0x0000,  // 182   unassigned, L
697     0x0000,  // 183   unassigned, L
698     0x0000,  // 184   unassigned, L
699     0x0000,  // 185   unassigned, L
700     0x0001,  // 186   Lu, L
701     0x0000,  // 187   unassigned, L
702     0x0000,  // 188   unassigned, L
703     0x0000,  // 189   unassigned, L
704     0x0000,  // 190   unassigned, L
705     0x0000,  // 191   unassigned, L
706     0x0000,  // 192   unassigned, L
707     0x0000,  // 193   unassigned, L
708     0x0000,  // 194   unassigned, L
709     0x0000,  // 195   unassigned, L
710     0x0000,  // 196   unassigned, L
711     0x0000,  // 197   unassigned, L
712     0x0000,  // 198   unassigned, L
713     0x0000,  // 199   unassigned, L
714     0x0000,  // 200   unassigned, L
715     0x0000,  // 201   unassigned, L
716     0x0000,  // 202   unassigned, L
717     0x0000,  // 203   unassigned, L
718     0x0000,  // 204   unassigned, L
719     0x0000,  // 205   unassigned, L
720     0x0000,  // 206   unassigned, L
721     0x0000,  // 207   unassigned, L
722     0x0000,  // 208   unassigned, L
723     0x0000,  // 209   unassigned, L
724     0x0000,  // 210   unassigned, L
725     0x0000,  // 211   unassigned, L
726     0x0000,  // 212   unassigned, L
727     0x0000,  // 213   unassigned, L
728     0x0000,  // 214   unassigned, L
729     0x0000,  // 215   unassigned, L
730     0x0000,  // 216   unassigned, L
731     0x0000,  // 217   unassigned, L
732     0x0000,  // 218   unassigned, L
733     0x0000,  // 219   unassigned, L
734     0x0000,  // 220   unassigned, L
735     0x0000,  // 221   unassigned, L
736     0x0000,  // 222   unassigned, L
737     0x0000,  // 223   unassigned, L
738     0x0000,  // 224   unassigned, L
739     0x0000,  // 225   unassigned, L
740     0x0000,  // 226   unassigned, L
741     0x0000,  // 227   unassigned, L
742     0x0000,  // 228   unassigned, L
743     0x0000,  // 229   unassigned, L
744     0x0000,  // 230   unassigned, L
745     0x0000,  // 231   unassigned, L
746     0x0000,  // 232   unassigned, L
747     0x0000,  // 233   unassigned, L
748     0x0000,  // 234   unassigned, L
749     0x0000,  // 235   unassigned, L
750     0x0000,  // 236   unassigned, L
751     0x0000,  // 237   unassigned, L
752     0x0000,  // 238   unassigned, L
753     0x0000,  // 239   unassigned, L
754     0x0000,  // 240   unassigned, L
755     0x0000,  // 241   unassigned, L
756     0x0000,  // 242   unassigned, L
757     0x0000,  // 243   unassigned, L
758     0x0000,  // 244   unassigned, L
759     0x0000,  // 245   unassigned, L
760     0x0000,  // 246   unassigned, L
761     0x0000,  // 247   unassigned, L
762     0x0000,  // 248   unassigned, L
763     0x0000,  // 249   unassigned, L
764     0x0000,  // 250   unassigned, L
765     0x0000,  // 251   unassigned, L
766     0x0000,  // 252   unassigned, L
767     0x0000,  // 253   unassigned, L
768     0x0000,  // 254   unassigned, L
769     0x0000   // 255   unassigned, L
770   ];
771 
772   // In all, the character property tables require 1024 bytes.
773         
774 }