1 /* 2 * Hunt - A refined core library for D programming language. 3 * 4 * Copyright (C) 2018-2019 HuntLabs 5 * 6 * Website: https://www.huntlabs.net/ 7 * 8 * Licensed under the Apache-2.0 License. 9 * 10 */ 11 12 module hunt.text.QuoteUtil; 13 14 import hunt.util.StringBuilder; 15 import hunt.Exceptions; 16 17 import std.ascii; 18 import std.conv; 19 import std.range; 20 import std.string; 21 22 23 /** 24 * Provide some consistent Http header value and Extension configuration parameter quoting support. 25 * <p> 26 * While QuotedStringTokenizer exists in the utils, and works great with http header values, using it in websocket-api is undesired. 27 * <ul> 28 * <li>Using QuotedStringTokenizer would introduce a dependency to the utils that would need to be exposed via the WebAppContext classloader</li> 29 * <li>ABNF defined extension parameter parsing requirements of RFC-6455 (WebSocket) ABNF, is slightly different than the ABNF parsing defined in RFC-2616 30 * (HTTP/1.1).</li> 31 * <li>Future HTTPbis ABNF changes for parsing will impact QuotedStringTokenizer</li> 32 * </ul> 33 * It was decided to keep this implementation separate for the above reasons. 34 */ 35 class QuoteUtil { 36 37 /** 38 * ABNF from RFC 2616, RFC 822, and RFC 6455 specified characters requiring quoting. 39 */ 40 enum string ABNF_REQUIRED_QUOTING = "\"'\\\n\r\t\f\b%+ ;="; 41 42 private enum char UNICODE_TAG = cast(char)0xFF; 43 private __gshared char[] escapes; 44 45 shared static this() { 46 escapes = new char[32]; 47 escapes[] = UNICODE_TAG; 48 // non-unicode 49 escapes['\b'] = 'b'; 50 escapes['\t'] = 't'; 51 escapes['\n'] = 'n'; 52 escapes['\f'] = 'f'; 53 escapes['\r'] = 'r'; 54 } 55 56 private static int dehex(byte b) { 57 if ((b >= '0') && (b <= '9')) { 58 return cast(byte) (b - '0'); 59 } 60 if ((b >= 'a') && (b <= 'f')) { 61 return cast(byte) ((b - 'a') + 10); 62 } 63 if ((b >= 'A') && (b <= 'F')) { 64 return cast(byte) ((b - 'A') + 10); 65 } 66 throw new IllegalArgumentException("!hex:" ~ to!string(0xff & b, 16)); 67 } 68 69 /** 70 * Remove quotes from a string, only if the input string start with and end with the same quote character. 71 * 72 * @param str the string to remove surrounding quotes from 73 * @return the de-quoted string 74 */ 75 static string dequote(string str) { 76 char start = str[0]; 77 if ((start == '\'') || (start == '\"')) { 78 // possibly quoted 79 char end = str[$ - 1]; 80 if (start == end) { 81 // dequote 82 return str[1 .. $-1]; 83 } 84 } 85 return str; 86 } 87 88 static void escape(StringBuilder buf, string str) { 89 foreach (char c ; str) { 90 if (c >= 32) { 91 // non special character 92 if ((c == '"') || (c == '\\')) { 93 buf.append('\\'); 94 } 95 buf.append(c); 96 } else { 97 // special characters, requiring escaping 98 char escaped = escapes[c]; 99 100 // is this a unicode escape? 101 if (escaped == UNICODE_TAG) { 102 buf.append("\\u00"); 103 if (c < 0x10) { 104 buf.append('0'); 105 } 106 buf.append(to!string(cast(int)c, 16)); // hex 107 } else { 108 // normal escape 109 buf.append('\\').append(escaped); 110 } 111 } 112 } 113 } 114 115 /** 116 * Simple quote of a string, escaping where needed. 117 * 118 * @param buf the StringBuilder to append to 119 * @param str the string to quote 120 */ 121 static void quote(StringBuilder buf, string str) { 122 buf.append('"'); 123 escape(buf, str); 124 buf.append('"'); 125 } 126 127 /** 128 * Append into buf the provided string, adding quotes if needed. 129 * <p> 130 * Quoting is determined if any of the characters in the <code>delim</code> are found in the input <code>str</code>. 131 * 132 * @param buf the buffer to append to 133 * @param str the string to possibly quote 134 * @param delim the delimiter characters that will trigger automatic quoting 135 */ 136 static void quoteIfNeeded(StringBuilder buf, string str, string delim) { 137 if (str is null) { 138 return; 139 } 140 // check for delimiters in input string 141 size_t len = str.length; 142 if (len == 0) { 143 return; 144 } 145 int ch; 146 for (size_t i = 0; i < len; i++) { 147 // ch = str.codePointAt(i); 148 ch = str[i]; 149 if (delim.indexOf(ch) >= 0) { 150 // found a delimiter codepoint. we need to quote it. 151 quote(buf, str); 152 return; 153 } 154 } 155 156 // no special delimiters used, no quote needed. 157 buf.append(str); 158 } 159 160 /** 161 * Create an iterator of the input string, breaking apart the string at the provided delimiters, removing quotes and triming the parts of the string as 162 * needed. 163 * 164 * @param str the input string to split apart 165 * @param delims the delimiter characters to split the string on 166 * @return the iterator of the parts of the string, trimmed, with quotes around the string part removed, and unescaped 167 */ 168 static InputRange!string splitAt(string str, string delims) { 169 return new DeQuotingStringIterator(str.strip(), delims); 170 } 171 172 static string unescape(string str) { 173 if (str is null) { 174 // nothing there 175 return null; 176 } 177 178 size_t len = str.length; 179 if (len <= 1) { 180 // impossible to be escaped 181 return str; 182 } 183 184 StringBuilder ret = new StringBuilder(len - 2); 185 bool escaped = false; 186 char c; 187 for (size_t i = 0; i < len; i++) { 188 c = str[i]; 189 if (escaped) { 190 escaped = false; 191 switch (c) { 192 case 'n': 193 ret.append('\n'); 194 break; 195 case 'r': 196 ret.append('\r'); 197 break; 198 case 't': 199 ret.append('\t'); 200 break; 201 case 'f': 202 ret.append('\f'); 203 break; 204 case 'b': 205 ret.append('\b'); 206 break; 207 case '\\': 208 ret.append('\\'); 209 break; 210 case '/': 211 ret.append('/'); 212 break; 213 case '"': 214 ret.append('"'); 215 break; 216 case 'u': 217 ret.append(cast(char) ((dehex(cast(byte) str[i++]) << 24) + 218 (dehex(cast(byte) str[i++]) << 16) + 219 (dehex(cast(byte) str[i++]) << 8) + 220 (dehex(cast(byte) str[i++])))); 221 break; 222 default: 223 ret.append(c); 224 } 225 } else if (c == '\\') { 226 escaped = true; 227 } else { 228 ret.append(c); 229 } 230 } 231 return ret.toString(); 232 } 233 234 // static string join(Object[] objs, string delim) { 235 // if (objs is null) { 236 // return ""; 237 // } 238 // StringBuilder ret = new StringBuilder(); 239 // int len = objs.length; 240 // for (int i = 0; i < len; i++) { 241 // if (i > 0) { 242 // ret.append(delim); 243 // } 244 // if (objs[i] instanceof string) { 245 // ret.append('"').append(objs[i]).append('"'); 246 // } else { 247 // ret.append(objs[i]); 248 // } 249 // } 250 // return ret.toString(); 251 // } 252 253 // static string join(Collection<?> objs, string delim) { 254 // if (objs is null) { 255 // return ""; 256 // } 257 // StringBuilder ret = new StringBuilder(); 258 // bool needDelim = false; 259 // foreach (Object obj ; objs) { 260 // if (needDelim) { 261 // ret.append(delim); 262 // } 263 // if (obj instanceof string) { 264 // ret.append('"').append(obj).append('"'); 265 // } else { 266 // ret.append(obj); 267 // } 268 // needDelim = true; 269 // } 270 // return ret.toString(); 271 // } 272 } 273 274 private class DeQuotingStringIterator : InputRange!string { 275 private enum State { 276 START, 277 TOKEN, 278 QUOTE_SINGLE, 279 QUOTE_DOUBLE 280 } 281 282 private string input; 283 private string delims; 284 private StringBuilder token; 285 private bool hasToken = false; 286 private int i = 0; 287 288 this(string input, string delims) { 289 this.input = input; 290 this.delims = delims; 291 size_t len = input.length; 292 token = new StringBuilder(len > 1024 ? 512 : len / 2); 293 294 popFront(); 295 } 296 297 private void appendToken(char c) { 298 if (hasToken) { 299 token.append(c); 300 } else { 301 if (isWhite(c)) { 302 return; // skip whitespace at start of token. 303 } else { 304 token.append(c); 305 hasToken = true; 306 } 307 } 308 } 309 310 bool empty() { 311 return !hasToken; 312 } 313 314 string front() @property { 315 if (!hasToken) { 316 throw new NoSuchElementException(); 317 } 318 string ret = token.toString(); 319 return QuoteUtil.dequote(ret.strip()); 320 } 321 322 void popFront() { 323 token.setLength(0); 324 hasToken = false; 325 326 State state = State.START; 327 bool escape = false; 328 size_t inputLen = input.length; 329 330 while (i < inputLen) { 331 char c = input[i++]; 332 333 switch (state) { 334 case State.START: { 335 if (c == '\'') { 336 state = State.QUOTE_SINGLE; 337 appendToken(c); 338 } else if (c == '\"') { 339 state = State.QUOTE_DOUBLE; 340 appendToken(c); 341 } else { 342 appendToken(c); 343 state = State.TOKEN; 344 } 345 break; 346 } 347 case State.TOKEN: { 348 if (delims.indexOf(c) >= 0) { 349 // System.out.printf("hasNext/t: %b [%s]%n",hasToken,token); 350 // return hasToken; 351 return; 352 } else if (c == '\'') { 353 state = State.QUOTE_SINGLE; 354 } else if (c == '\"') { 355 state = State.QUOTE_DOUBLE; 356 } 357 appendToken(c); 358 break; 359 } 360 case State.QUOTE_SINGLE: { 361 if (escape) { 362 escape = false; 363 appendToken(c); 364 } else if (c == '\'') { 365 appendToken(c); 366 state = State.TOKEN; 367 } else if (c == '\\') { 368 escape = true; 369 } else { 370 appendToken(c); 371 } 372 break; 373 } 374 case State.QUOTE_DOUBLE: { 375 if (escape) { 376 escape = false; 377 appendToken(c); 378 } else if (c == '\"') { 379 appendToken(c); 380 state = State.TOKEN; 381 } else if (c == '\\') { 382 escape = true; 383 } else { 384 appendToken(c); 385 } 386 break; 387 } 388 389 default: break; 390 } 391 // System.out.printf("%s <%s> : [%s]%n",state,c,token); 392 } 393 } 394 395 396 int opApply(scope int delegate(string) dg) { 397 if(dg is null) 398 throw new NullPointerException(""); 399 int result = 0; 400 while(hasToken && result == 0) { 401 result = dg(front()); 402 popFront(); 403 } 404 return result; 405 } 406 407 int opApply(scope int delegate(size_t, string) dg) { 408 if(dg is null) 409 throw new NullPointerException(""); 410 int result = 0; 411 size_t index = 0; 412 while(hasToken && result == 0) { 413 result = dg(index++, front()); 414 popFront(); 415 } 416 return result; 417 } 418 419 string moveFront() { 420 throw new UnsupportedOperationException("Remove not supported with this iterator"); 421 } 422 423 }