1 /* 2 * Hunt - A refined core library for D programming language. 3 * 4 * Copyright (C) 2018-2019 HuntLabs 5 * 6 * Website: https://www.huntlabs.net/ 7 * 8 * Licensed under the Apache-2.0 License. 9 * 10 */ 11 12 module hunt.text.CharacterData; 13 14 15 public import hunt.Char; 16 public import hunt.text.CharacterData.CharacterData; 17 public import hunt.text.CharacterData.CharacterDataLatin1; 18 19 20 /** 21 */ 22 class CharacterHelper { 23 static CharacterData getCharacterData(int ch) { 24 if (ch >>> 8 == 0) { // fast-path 25 return CharacterDataLatin1.instance; 26 } else { 27 throw new Exception("Unimplemented"); 28 // switch(ch >>> 16) { //plane 00-16 29 // case(0): 30 // return CharacterData00.instance; 31 // case(1): 32 // return CharacterData01.instance; 33 // case(2): 34 // return CharacterData02.instance; 35 // case(14): 36 // return CharacterData0E.instance; 37 // case(15): // Private Use 38 // case(16): // Private Use 39 // return CharacterDataPrivateUse.instance; 40 // default: 41 // return CharacterDataUndefined.instance; 42 // } 43 } 44 } 45 46 47 /** 48 * Returns the numeric value of the character {@code ch} in the 49 * specified radix. 50 * <p> 51 * If the radix is not in the range {@code MIN_RADIX} ≤ 52 * {@code radix} ≤ {@code MAX_RADIX} or if the 53 * value of {@code ch} is not a valid digit in the specified 54 * radix, {@code -1} is returned. A character is a valid digit 55 * if at least one of the following is true: 56 * <ul> 57 * <li>The method {@code isDigit} is {@code true} of the character 58 * and the Unicode decimal digit value of the character (or its 59 * single-character decomposition) is less than the specified radix. 60 * In this case the decimal digit value is returned. 61 * <li>The character is one of the uppercase Latin letters 62 * {@code 'A'} through {@code 'Z'} and its code is less than 63 * {@code radix + 'A' - 10}. 64 * In this case, {@code ch - 'A' + 10} 65 * is returned. 66 * <li>The character is one of the lowercase Latin letters 67 * {@code 'a'} through {@code 'z'} and its code is less than 68 * {@code radix + 'a' - 10}. 69 * In this case, {@code ch - 'a' + 10} 70 * is returned. 71 * <li>The character is one of the fullwidth uppercase Latin letters A 72 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 73 * and its code is less than 74 * {@code radix + '\u005CuFF21' - 10}. 75 * In this case, {@code ch - '\u005CuFF21' + 10} 76 * is returned. 77 * <li>The character is one of the fullwidth lowercase Latin letters a 78 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 79 * and its code is less than 80 * {@code radix + '\u005CuFF41' - 10}. 81 * In this case, {@code ch - '\u005CuFF41' + 10} 82 * is returned. 83 * </ul> 84 * 85 * <p><b>Note:</b> This method cannot handle <a 86 * href="#supplementary"> supplementary characters</a>. To support 87 * all Unicode characters, including supplementary characters, use 88 * the {@link #digit(int, int)} method. 89 * 90 * @param ch the character to be converted. 91 * @param radix the radix. 92 * @return the numeric value represented by the character in the 93 * specified radix. 94 * @see Character#forDigit(int, int) 95 * @see Character#isDigit(char) 96 */ 97 static int digit(char ch, int radix) { 98 return digit(cast(int)ch, radix); 99 } 100 101 102 static int digit(int codePoint, int radix) { 103 return getCharacterData(codePoint).digit(codePoint, radix); 104 } 105 }