1 /*
2  * Hunt - A refined core library for D programming language.
3  *
4  * Copyright (C) 2018-2019 HuntLabs
5  *
6  * Website: https://www.huntlabs.net/
7  *
8  * Licensed under the Apache-2.0 License.
9  *
10  */
11 
12 module hunt.text.QuoteUtil;
13 
14 import hunt.util.StringBuilder;
15 import hunt.Exceptions;
16 
17 import std.ascii;
18 import std.conv;
19 import std.range;
20 import std.string;
21 
22 
23 /**
24  * Provide some consistent Http header value and Extension configuration parameter quoting support.
25  * <p>
26  * While QuotedStringTokenizer exists in the utils, and works great with http header values, using it in websocket-api is undesired.
27  * <ul>
28  * <li>Using QuotedStringTokenizer would introduce a dependency to the utils that would need to be exposed via the WebAppContext classloader</li>
29  * <li>ABNF defined extension parameter parsing requirements of RFC-6455 (WebSocket) ABNF, is slightly different than the ABNF parsing defined in RFC-2616
30  * (HTTP/1.1).</li>
31  * <li>Future HTTPbis ABNF changes for parsing will impact QuotedStringTokenizer</li>
32  * </ul>
33  * It was decided to keep this implementation separate for the above reasons.
34  */
35 class QuoteUtil {
36 
37     /**
38      * ABNF from RFC 2616, RFC 822, and RFC 6455 specified characters requiring quoting.
39      */
40     enum string ABNF_REQUIRED_QUOTING = "\"'\\\n\r\t\f\b%+ ;=";
41 
42     private enum char UNICODE_TAG = cast(char)0xFF;
43     private __gshared char[] escapes;
44 
45     shared static this() {
46         escapes = new char[32];
47         escapes[] = UNICODE_TAG;
48         // non-unicode
49         escapes['\b'] = 'b';
50         escapes['\t'] = 't';
51         escapes['\n'] = 'n';
52         escapes['\f'] = 'f';
53         escapes['\r'] = 'r';
54     }
55 
56     private static int dehex(byte b) {
57         if ((b >= '0') && (b <= '9')) {
58             return cast(byte) (b - '0');
59         }
60         if ((b >= 'a') && (b <= 'f')) {
61             return cast(byte) ((b - 'a') + 10);
62         }
63         if ((b >= 'A') && (b <= 'F')) {
64             return cast(byte) ((b - 'A') + 10);
65         }
66         throw new IllegalArgumentException("!hex:" ~ to!string(0xff & b, 16));
67     }
68 
69     /**
70      * Remove quotes from a string, only if the input string start with and end with the same quote character.
71      *
72      * @param str the string to remove surrounding quotes from
73      * @return the de-quoted string
74      */
75     static string dequote(string str) {
76         char start = str[0];
77         if ((start == '\'') || (start == '\"')) {
78             // possibly quoted
79             char end = str[$ - 1];
80             if (start == end) {
81                 // dequote
82                 return str[1 .. $-1];
83             }
84         }
85         return str;
86     }
87 
88     static void escape(StringBuilder buf, string str) {
89         foreach (char c ; str) {
90             if (c >= 32) {
91                 // non special character
92                 if ((c == '"') || (c == '\\')) {
93                     buf.append('\\');
94                 }
95                 buf.append(c);
96             } else {
97                 // special characters, requiring escaping
98                 char escaped = escapes[c];
99 
100                 // is this a unicode escape?
101                 if (escaped == UNICODE_TAG) {
102                     buf.append("\\u00");
103                     if (c < 0x10) {
104                         buf.append('0');
105                     }
106                     buf.append(to!string(cast(int)c, 16)); // hex
107                 } else {
108                     // normal escape
109                     buf.append('\\').append(escaped);
110                 }
111             }
112         }
113     }
114 
115     /**
116      * Simple quote of a string, escaping where needed.
117      *
118      * @param buf the StringBuilder to append to
119      * @param str the string to quote
120      */
121     static void quote(StringBuilder buf, string str) {
122         buf.append('"');
123         escape(buf, str);
124         buf.append('"');
125     }
126 
127     /**
128      * Append into buf the provided string, adding quotes if needed.
129      * <p>
130      * Quoting is determined if any of the characters in the <code>delim</code> are found in the input <code>str</code>.
131      *
132      * @param buf   the buffer to append to
133      * @param str   the string to possibly quote
134      * @param delim the delimiter characters that will trigger automatic quoting
135      */
136     static void quoteIfNeeded(StringBuilder buf, string str, string delim) {
137         if (str is null) {
138             return;
139         }
140         // check for delimiters in input string
141         size_t len = str.length;
142         if (len == 0) {
143             return;
144         }
145         int ch;
146         for (size_t i = 0; i < len; i++) {
147             // ch = str.codePointAt(i);
148             ch = str[i];
149             if (delim.indexOf(ch) >= 0) {
150                 // found a delimiter codepoint. we need to quote it.
151                 quote(buf, str);
152                 return;
153             }
154         }
155 
156         // no special delimiters used, no quote needed.
157         buf.append(str);
158     }
159 
160     /**
161      * Create an iterator of the input string, breaking apart the string at the provided delimiters, removing quotes and triming the parts of the string as
162      * needed.
163      *
164      * @param str    the input string to split apart
165      * @param delims the delimiter characters to split the string on
166      * @return the iterator of the parts of the string, trimmed, with quotes around the string part removed, and unescaped
167      */
168     static InputRange!string splitAt(string str, string delims) {
169         return new DeQuotingStringIterator(str.strip(), delims);
170     }
171 
172     static string unescape(string str) {
173         if (str is null) {
174             // nothing there
175             return null;
176         }
177 
178         size_t len = str.length;
179         if (len <= 1) {
180             // impossible to be escaped
181             return str;
182         }
183 
184         StringBuilder ret = new StringBuilder(len - 2);
185         bool escaped = false;
186         char c;
187         for (size_t i = 0; i < len; i++) {
188             c = str[i];
189             if (escaped) {
190                 escaped = false;
191                 switch (c) {
192                     case 'n':
193                         ret.append('\n');
194                         break;
195                     case 'r':
196                         ret.append('\r');
197                         break;
198                     case 't':
199                         ret.append('\t');
200                         break;
201                     case 'f':
202                         ret.append('\f');
203                         break;
204                     case 'b':
205                         ret.append('\b');
206                         break;
207                     case '\\':
208                         ret.append('\\');
209                         break;
210                     case '/':
211                         ret.append('/');
212                         break;
213                     case '"':
214                         ret.append('"');
215                         break;
216                     case 'u':
217                         ret.append(cast(char) ((dehex(cast(byte) str[i++]) << 24) + 
218                             (dehex(cast(byte) str[i++]) << 16) + 
219                             (dehex(cast(byte) str[i++]) << 8) + 
220                             (dehex(cast(byte) str[i++]))));
221                         break;
222                     default:
223                         ret.append(c);
224                 }
225             } else if (c == '\\') {
226                 escaped = true;
227             } else {
228                 ret.append(c);
229             }
230         }
231         return ret.toString();
232     }
233 
234     // static string join(Object[] objs, string delim) {
235     //     if (objs is null) {
236     //         return "";
237     //     }
238     //     StringBuilder ret = new StringBuilder();
239     //     int len = objs.length;
240     //     for (int i = 0; i < len; i++) {
241     //         if (i > 0) {
242     //             ret.append(delim);
243     //         }
244     //         if (objs[i] instanceof string) {
245     //             ret.append('"').append(objs[i]).append('"');
246     //         } else {
247     //             ret.append(objs[i]);
248     //         }
249     //     }
250     //     return ret.toString();
251     // }
252 
253     // static string join(Collection<?> objs, string delim) {
254     //     if (objs is null) {
255     //         return "";
256     //     }
257     //     StringBuilder ret = new StringBuilder();
258     //     bool needDelim = false;
259     //     foreach (Object obj ; objs) {
260     //         if (needDelim) {
261     //             ret.append(delim);
262     //         }
263     //         if (obj instanceof string) {
264     //             ret.append('"').append(obj).append('"');
265     //         } else {
266     //             ret.append(obj);
267     //         }
268     //         needDelim = true;
269     //     }
270     //     return ret.toString();
271     // }
272 }
273 
274 private class DeQuotingStringIterator : InputRange!string { 
275     private enum State {
276         START,
277         TOKEN,
278         QUOTE_SINGLE,
279         QUOTE_DOUBLE
280     }
281 
282     private string input;
283     private string delims;
284     private StringBuilder token;
285     private bool hasToken = false;
286     private int i = 0;
287 
288     this(string input, string delims) {
289         this.input = input;
290         this.delims = delims;
291         size_t len = input.length;
292         token = new StringBuilder(len > 1024 ? 512 : len / 2);
293 
294         popFront();
295     }
296 
297     private void appendToken(char c) {
298         if (hasToken) {
299             token.append(c);
300         } else {
301             if (isWhite(c)) {
302                 return; // skip whitespace at start of token.
303             } else {
304                 token.append(c);
305                 hasToken = true;
306             }
307         }
308     }
309 
310     bool empty() {
311         return !hasToken;
312     }
313 
314     string front() @property { 
315         if (!hasToken) {
316             throw new NoSuchElementException();
317         }
318         string ret = token.toString();
319         return QuoteUtil.dequote(ret.strip());
320      }
321 
322     void popFront() {
323         token.setLength(0);
324         hasToken = false;
325 
326         State state = State.START;
327         bool escape = false;
328         size_t inputLen = input.length;
329 
330         while (i < inputLen) {
331             char c = input[i++];
332 
333             switch (state) {
334                 case State.START: {
335                     if (c == '\'') {
336                         state = State.QUOTE_SINGLE;
337                         appendToken(c);
338                     } else if (c == '\"') {
339                         state = State.QUOTE_DOUBLE;
340                         appendToken(c);
341                     } else {
342                         appendToken(c);
343                         state = State.TOKEN;
344                     }
345                     break;
346                 }
347                 case State.TOKEN: {
348                     if (delims.indexOf(c) >= 0) {
349                         // System.out.printf("hasNext/t: %b [%s]%n",hasToken,token);
350                         // return hasToken;
351                         return;
352                     } else if (c == '\'') {
353                         state = State.QUOTE_SINGLE;
354                     } else if (c == '\"') {
355                         state = State.QUOTE_DOUBLE;
356                     }
357                     appendToken(c);
358                     break;
359                 }
360                 case State.QUOTE_SINGLE: {
361                     if (escape) {
362                         escape = false;
363                         appendToken(c);
364                     } else if (c == '\'') {
365                         appendToken(c);
366                         state = State.TOKEN;
367                     } else if (c == '\\') {
368                         escape = true;
369                     } else {
370                         appendToken(c);
371                     }
372                     break;
373                 }
374                 case State.QUOTE_DOUBLE: {
375                     if (escape) {
376                         escape = false;
377                         appendToken(c);
378                     } else if (c == '\"') {
379                         appendToken(c);
380                         state = State.TOKEN;
381                     } else if (c == '\\') {
382                         escape = true;
383                     } else {
384                         appendToken(c);
385                     }
386                     break;
387                 }
388 
389                 default: break;
390             }
391             // System.out.printf("%s <%s> : [%s]%n",state,c,token);
392         }
393     }
394 
395 
396     int opApply(scope int delegate(string) dg) {
397         if(dg is null)
398             throw new NullPointerException("");
399         int result = 0;
400         while(hasToken && result == 0) {
401             result = dg(front());
402             popFront();
403         }
404         return result;
405     }
406 
407     int opApply(scope int delegate(size_t, string) dg) {
408         if(dg is null)
409             throw new NullPointerException("");
410         int result = 0;          
411         size_t index = 0;
412         while(hasToken && result == 0) {
413             result = dg(index++, front());
414             popFront();
415         }
416         return result;
417     }
418 
419     string moveFront() {
420         throw new UnsupportedOperationException("Remove not supported with this iterator");
421     }
422 
423 }