1 /* 2 * Hunt - A refined core library for D programming language. 3 * 4 * Copyright (C) 2018-2019 HuntLabs 5 * 6 * Website: https://www.huntlabs.net/ 7 * 8 * Licensed under the Apache-2.0 License. 9 * 10 */ 11 12 module hunt.text.QuotedStringTokenizer; 13 14 import std.conv; 15 import std.ascii; 16 import std.string; 17 18 // import hunt.collection.StringBuilder; 19 import hunt.text.StringTokenizer; 20 import hunt.text.Common; 21 import hunt.util.StringBuilder; 22 import hunt.Exceptions; 23 import hunt.util.Appendable; 24 import hunt.util.Common; 25 import hunt.util.ConverterUtils; 26 27 28 /** 29 * StringTokenizer with Quoting support. 30 * 31 * This class is a copy of the java.util.StringTokenizer API and the behaviour 32 * is the same, except that single and double quoted string values are 33 * recognised. Delimiters within quotes are not considered delimiters. Quotes 34 * can be escaped with '\'. 35 * 36 * @see java.util.StringTokenizer 37 * 38 */ 39 class QuotedStringTokenizer : StringTokenizer { 40 private enum string __delim = "\t\n\r"; 41 private string _string; 42 private string _delim = __delim; 43 private bool _returnQuotes = false; 44 private bool _returnDelimiters = false; 45 private StringBuilder _token; 46 private bool _hasToken = false; 47 private int _i = 0; 48 private int _lastStart = 0; 49 private bool _double = true; 50 private bool _single = true; 51 52 this(string str, string delim, bool returnDelimiters, bool returnQuotes) { 53 super(""); 54 _string = str; 55 if (delim !is null) 56 _delim = delim; 57 _returnDelimiters = returnDelimiters; 58 _returnQuotes = returnQuotes; 59 60 if (_delim.indexOf('\'') >= 0 || _delim.indexOf('"') >= 0) 61 throw new Error("Can't use quotes as delimiters: " ~ _delim); 62 63 _token = new StringBuilder(_string.length > 1024 ? 512 : _string.length / 2); 64 } 65 66 this(string str, string delim, bool returnDelimiters) { 67 this(str, delim, returnDelimiters, false); 68 } 69 70 this(string str, string delim) { 71 this(str, delim, false, false); 72 } 73 74 this(string str) { 75 this(str, null, false, false); 76 } 77 78 override 79 bool hasMoreTokens() { 80 // Already found a token 81 if (_hasToken) 82 return true; 83 84 _lastStart = _i; 85 86 int state = 0; 87 bool escape = false; 88 while (_i < _string.length) { 89 char c = _string.charAt(_i++); 90 91 switch (state) { 92 case 0: // Start 93 if (_delim.indexOf(c) >= 0) { 94 if (_returnDelimiters) { 95 _token.append(c); 96 return _hasToken = true; 97 } 98 } else if (c == '\'' && _single) { 99 if (_returnQuotes) 100 _token.append(c); 101 state = 2; 102 } else if (c == '\"' && _double) { 103 if (_returnQuotes) 104 _token.append(c); 105 state = 3; 106 } else { 107 _token.append(c); 108 _hasToken = true; 109 state = 1; 110 } 111 break; 112 113 case 1: // Token 114 _hasToken = true; 115 if (_delim.indexOf(c) >= 0) { 116 if (_returnDelimiters) 117 _i--; 118 return _hasToken; 119 } else if (c == '\'' && _single) { 120 if (_returnQuotes) 121 _token.append(c); 122 state = 2; 123 } else if (c == '\"' && _double) { 124 if (_returnQuotes) 125 _token.append(c); 126 state = 3; 127 } else { 128 _token.append(c); 129 } 130 break; 131 132 case 2: // Single Quote 133 _hasToken = true; 134 if (escape) { 135 escape = false; 136 _token.append(c); 137 } else if (c == '\'') { 138 if (_returnQuotes) 139 _token.append(c); 140 state = 1; 141 } else if (c == '\\') { 142 if (_returnQuotes) 143 _token.append(c); 144 escape = true; 145 } else { 146 _token.append(c); 147 } 148 break; 149 150 case 3: // Double Quote 151 _hasToken = true; 152 if (escape) { 153 escape = false; 154 _token.append(c); 155 } else if (c == '\"') { 156 if (_returnQuotes) 157 _token.append(c); 158 state = 1; 159 } else if (c == '\\') { 160 if (_returnQuotes) 161 _token.append(c); 162 escape = true; 163 } else { 164 _token.append(c); 165 } 166 break; 167 168 default: 169 break; 170 } 171 } 172 173 return _hasToken; 174 } 175 176 override 177 string nextToken() { 178 if (!hasMoreTokens() || _token is null) 179 throw new NoSuchElementException(""); 180 string t = _token.toString(); 181 _token.setLength(0); 182 _hasToken = false; 183 return t; 184 } 185 186 override 187 string nextToken(string delim) { 188 _delim = delim; 189 _i = _lastStart; 190 _token.setLength(0); 191 _hasToken = false; 192 return nextToken(); 193 } 194 195 196 /** 197 * Not implemented. 198 */ 199 override 200 int countTokens() { 201 return -1; 202 } 203 204 /** 205 * Quote a string. The string is quoted only if quoting is required due to 206 * embedded delimiters, quote characters or the empty string. 207 * 208 * @param s 209 * The string to quote. 210 * @param delim 211 * the delimiter to use to quote the string 212 * @return quoted string 213 */ 214 static string quoteIfNeeded(string s, string delim) { 215 if (s is null) 216 return null; 217 if (s.length == 0) 218 return "\"\""; 219 220 for (int i = 0; i < s.length; i++) { 221 char c = s[i]; 222 if (c == '\\' || c == '"' || c == '\'' || std.ascii.isWhite(c) || delim.indexOf(c) >= 0) { 223 StringBuilder b = new StringBuilder(s.length + 8); 224 quote(b, s); 225 return b.toString(); 226 } 227 } 228 229 return s; 230 } 231 232 /** 233 * Quote a string. The string is quoted only if quoting is required due to 234 * embeded delimiters, quote characters or the empty string. 235 * 236 * @param s 237 * The string to quote. 238 * @return quoted string 239 */ 240 static string quote(string s) { 241 if (s is null) 242 return null; 243 if (s.length == 0) 244 return "\"\""; 245 246 StringBuilder b = new StringBuilder(s.length + 8); 247 quote(b, s); 248 return b.toString(); 249 250 } 251 252 private __gshared char[] escapes; // = new char[32]; 253 254 shared static this() { 255 // escapes[] = cast(char) 0xFFFF; 256 escapes = new char[32]; 257 escapes[] = cast(char) 0xFF; 258 // for(size_t i=0; i<escapes.length; i++) 259 // escapes[i] = cast(char) 0xFFFF; 260 escapes['\b'] = 'b'; 261 escapes['\t'] = 't'; 262 escapes['\n'] = 'n'; 263 escapes['\f'] = 'f'; 264 escapes['\r'] = 'r'; 265 } 266 267 /** 268 * Quote a string into an Appendable. Only quotes and backslash are escaped. 269 * 270 * @param buffer 271 * The Appendable 272 * @param input 273 * The string to quote. 274 */ 275 static void quoteOnly(Appendable buffer, string input) { 276 if (input is null) 277 return; 278 279 try { 280 buffer.append('"'); 281 for (int i = 0; i < input.length; ++i) { 282 char c = input[i]; 283 if (c == '"' || c == '\\') 284 buffer.append('\\'); 285 buffer.append(c); 286 } 287 buffer.append('"'); 288 } catch (IOException x) { 289 throw new RuntimeException(x); 290 } 291 } 292 293 /** 294 * Quote a string into an Appendable. The characters ", \, \n, \r, \t, \f 295 * and \b are escaped 296 * 297 * @param buffer 298 * The Appendable 299 * @param input 300 * The string to quote. 301 */ 302 static void quote(Appendable buffer, string input) { 303 if (input is null) 304 return; 305 306 try { 307 buffer.append('"'); 308 for (int i = 0; i < input.length; ++i) { 309 char c = input[i]; 310 if (c >= 32) { 311 if (c == '"' || c == '\\') 312 buffer.append('\\'); 313 buffer.append(c); 314 } else { 315 char escape = escapes[c]; 316 if (escape == 0xFFFF) { 317 // Unicode escape 318 buffer.append('\\').append('u').append('0').append('0'); 319 if (c < 0x10) 320 buffer.append('0'); 321 buffer.append(to!string(cast(int)c, 16)); 322 } else { 323 buffer.append('\\').append(escape); 324 } 325 } 326 } 327 buffer.append('"'); 328 } catch (IOException x) { 329 throw new RuntimeException(x); 330 } 331 } 332 333 static string unquoteOnly(string s) { 334 return unquoteOnly(s, false); 335 } 336 337 /** 338 * Unquote a string, NOT converting unicode sequences 339 * 340 * @param s 341 * The string to unquote. 342 * @param lenient 343 * if true, will leave in backslashes that aren't valid escapes 344 * @return quoted string 345 */ 346 static string unquoteOnly(string s, bool lenient) { 347 if (s is null) 348 return null; 349 if (s.length < 2) 350 return s; 351 352 char first = s.charAt(0); 353 char last = s.charAt(cast(int)s.length - 1); 354 if (first != last || (first != '"' && first != '\'')) 355 return s; 356 357 StringBuilder b = new StringBuilder(cast(int)s.length - 2); 358 bool escape = false; 359 for (int i = 1; i < s.length - 1; i++) { 360 char c = s[i]; 361 362 if (escape) { 363 escape = false; 364 if (lenient && !isValidEscaping(c)) { 365 b.append('\\'); 366 } 367 b.append(c); 368 } else if (c == '\\') { 369 escape = true; 370 } else { 371 b.append(c); 372 } 373 } 374 375 return b.toString(); 376 } 377 378 static string unquote(string s) { 379 return unquote(s, false); 380 } 381 382 /** 383 * Unquote a string. 384 * 385 * @param s 386 * The string to unquote. 387 * @param lenient 388 * true if unquoting should be lenient to escaped content, 389 * leaving some alone, false if string unescaping 390 * @return quoted string 391 */ 392 static string unquote(string s, bool lenient) { 393 if (s is null) 394 return null; 395 if (s.length < 2) 396 return s; 397 398 char first = s.charAt(0); 399 char last = s.charAt(cast(int)s.length - 1); 400 if (first != last || (first != '"' && first != '\'')) 401 return s; 402 403 StringBuilder b = new StringBuilder(cast(int)s.length - 2); 404 bool escape = false; 405 for (int i = 1; i < cast(int)s.length - 1; i++) { 406 char c = s[i]; 407 408 if (escape) { 409 escape = false; 410 switch (c) { 411 case 'n': 412 b.append('\n'); 413 break; 414 case 'r': 415 b.append('\r'); 416 break; 417 case 't': 418 b.append('\t'); 419 break; 420 case 'f': 421 b.append('\f'); 422 break; 423 case 'b': 424 b.append('\b'); 425 break; 426 case '\\': 427 b.append('\\'); 428 break; 429 case '/': 430 b.append('/'); 431 break; 432 case '"': 433 b.append('"'); 434 break; 435 case 'u': 436 b.append(cast(char) ((ConverterUtils.convertHexDigit(cast(byte) s.charAt(i++)) << 24) 437 + (ConverterUtils.convertHexDigit(cast(byte) s.charAt(i++)) << 16) 438 + (ConverterUtils.convertHexDigit(cast(byte) s.charAt(i++)) << 8) 439 + (ConverterUtils.convertHexDigit(cast(byte) s.charAt(i++))))); 440 break; 441 default: 442 if (lenient && !isValidEscaping(c)) { 443 b.append('\\'); 444 } 445 b.append(c); 446 } 447 } else if (c == '\\') { 448 escape = true; 449 } else { 450 b.append(c); 451 } 452 } 453 454 return b.toString(); 455 } 456 457 /** 458 * Check that char c (which is preceded by a backslash) is a valid escape 459 * sequence. 460 * 461 * @param c 462 * @return 463 */ 464 private static bool isValidEscaping(char c) { 465 return ((c == 'n') || (c == 'r') || (c == 't') || (c == 'f') || (c == 'b') || (c == '\\') || (c == '/') 466 || (c == '"') || (c == 'u')); 467 } 468 469 static bool isQuoted(string s) { 470 return s !is null && s.length > 0 && s.charAt(0) == '"' && s.charAt(cast(int)s.length - 1) == '"'; 471 } 472 473 /** 474 * @return handle double quotes if true 475 */ 476 bool getDouble() { 477 return _double; 478 } 479 480 /** 481 * @param d 482 * handle double quotes if true 483 */ 484 void setDouble(bool d) { 485 _double = d; 486 } 487 488 /** 489 * @return handle single quotes if true 490 */ 491 bool getSingle() { 492 return _single; 493 } 494 495 /** 496 * @param single 497 * handle single quotes if true 498 */ 499 void setSingle(bool single) { 500 _single = single; 501 } 502 }