1 /*
2  * Hunt - A refined core library for D programming language.
3  *
4  * Copyright (C) 2018-2019 HuntLabs
5  *
6  * Website: https://www.huntlabs.net/
7  *
8  * Licensed under the Apache-2.0 License.
9  *
10  */
11 
12 module hunt.util.MimeTypeUtils;
13 
14 
15 import hunt.collection;
16 import hunt.Exceptions;
17 import hunt.logging.ConsoleLogger;
18 import hunt.text;
19 import hunt.util.AcceptMimeType;
20 import hunt.util.MimeType;
21 import hunt.util.ObjectUtils;
22 
23 import std.algorithm;
24 import std.array;
25 import std.concurrency : initOnce;
26 import std.container.array;
27 import std.conv;
28 import std.file;
29 import std.path;
30 import std.range;
31 import std.stdio;
32 import std.string;
33 import std.uni;
34 
35 
36 /**
37 */
38 class MimeTypeUtils {
39 
40     // Allow installing resources into a shared dir
41     private static string getResourcePrefix() {
42         mixin("string resourcePrefix = \"@DATA_PREFIX@\";");
43         // We don't want meson to replace the CONF_PREFIX here too,
44         // otherwise this would always be true.
45         if (resourcePrefix == join(["@DATA", "_PREFIX@"])) {
46             return dirName(thisExePath()) ~ "/resources";
47         } else {
48             return buildPath(resourcePrefix, "resources");
49         }
50     }
51 
52     // private __gshared static ByteBuffer[string] TYPES; // = new ArrayTrie<>(512);
53     private static Map!(string, string) __dftMimeMap() {
54         __gshared Map!(string, string) m;
55         return initOnce!m({
56             Map!(string, string) _m = new HashMap!(string, string)();
57             auto resourcePath = getResourcePrefix();
58             string resourceName = buildPath(resourcePath, "mime.properties");
59             loadMimeProperties(resourceName, _m);
60             return _m;
61         }());
62     }
63 
64     private __gshared Map!(string, string) _inferredEncodings;
65     private __gshared Map!(string, string) _assumedEncodings;
66 
67     private static void initializeEncodingsMap() {
68         __gshared bool _isEncodingsLoaded = false;
69         initOnce!(_isEncodingsLoaded)({
70             _inferredEncodings = new HashMap!(string, string)();
71             _assumedEncodings = new HashMap!(string, string)();
72 
73             foreach (MimeType type ; MimeType.values) {
74                 CACHE[type.toString()] = type;
75                 // TYPES[type.toString()] = type.asBuffer();
76 
77                 auto charset = type.toString().indexOf(";charset=");
78                 if (charset > 0) {
79                     string alt = type.toString().replace(";charset=", "; charset=");
80                     CACHE[alt] = type;
81                     // TYPES[alt] = type.asBuffer();
82                 }
83 
84                 if (type.isCharsetAssumed())
85                     _assumedEncodings.put(type.asString(), type.getCharsetString());
86             }
87 
88             auto resourcePath = getResourcePrefix();
89             string resourceName = buildPath(resourcePath, "encoding.properties");
90             loadEncodingProperties(resourceName);
91             return true;
92         }());
93     }
94 
95     __gshared MimeType[string] CACHE; 
96 
97 
98     private static void loadMimeProperties(string fileName, Map!(string, string) m) {
99         if(!exists(fileName)) {
100             version(HUNT_DEBUG) warningf("File does not exist: %s", fileName);
101             return;
102         }
103 
104         void doLoad() {
105             version(HUNT_DEBUG) tracef("loading MIME properties from: %s", fileName);
106             try {
107                 File f = File(fileName, "r");
108                 scope(exit) f.close();
109                 string line;
110                 int count = 0;
111                 while((line = f.readln()) !is null) {
112                     string[] parts = split(line, "=");
113                     if(parts.length < 2) continue;
114 
115                     count++;
116                     string key = parts[0].strip().toLower();
117                     string value = normalizeMimeType(parts[1].strip());
118                     // trace(key, " = ", value);
119                     m.put(key, value);
120                 }
121 
122                 if (m.size() == 0) {
123                     warningf("Empty mime types at %s", fileName);
124                 } else if (m.size() < count) {
125                     warningf("Duplicate or null mime-type extension in resource: %s", fileName);
126                 }            
127             } catch(Exception ex) {
128                 warningf(ex.toString());
129             }
130         }
131 
132         doLoad();
133         // import std.parallelism;
134         // auto t = task(&doLoad);
135         // t.executeInNewThread();
136     }
137 
138     private static void loadEncodingProperties(string fileName) {
139         if(!exists(fileName)) {
140             version(HUNT_DEBUG) warningf("File does not exist: %s", fileName);
141             return;
142         }
143 
144         version(HUNT_DEBUG) tracef("loading MIME properties from: %s", fileName);
145         try {
146             File f = File(fileName, "r");
147             scope(exit) f.close();
148             string line;
149             int count = 0;
150             while((line = f.readln()) !is null) {
151                 string[] parts = split(line, "=");
152                 if(parts.length < 2) continue;
153 
154                 count++;
155                 string t = parts[0].strip();
156                 string charset = parts[1].strip();
157                 version(HUNT_DEBUG) trace(t, " = ", charset);
158                 if(charset.startsWith("-"))
159                     _assumedEncodings.put(t, charset[1..$]);
160                 else
161                     _inferredEncodings.put(t, charset);
162             }
163 
164             if (_inferredEncodings.size() == 0) {
165                 warningf("Empty encodings at %s", fileName);
166             } else if (_inferredEncodings.size() + _assumedEncodings.size() < count) {
167                 warningf("Null or duplicate encodings in resource: %s", fileName);
168             }            
169         } catch(Exception ex) {
170             warningf(ex.toString());
171         }
172     }
173 
174     /**
175      * Constructor.
176      */
177     this() {
178     }
179 
180     Map!(string, string) getMimeMap() {
181         if(_mimeMap is null)
182             _mimeMap = new HashMap!(string, string)();
183         return _mimeMap;
184     }
185 
186     private Map!(string, string) _mimeMap; 
187 
188     /**
189      * @param mimeMap A Map of file extension to mime-type.
190      */
191     void setMimeMap(Map!(string, string) mimeMap) {
192         _mimeMap.clear();
193         if (mimeMap !is null) {
194             foreach (string k, string v ; mimeMap) {
195                 _mimeMap.put(std.uni.toLower(k), normalizeMimeType(v));
196             }
197         }
198     }
199 
200     /**
201      * Get the MIME type by filename extension.
202      * Lookup only the static default mime map.
203      *
204      * @param filename A file name
205      * @return MIME type matching the longest dot extension of the
206      * file name.
207      */
208     static string getDefaultMimeByExtension(string filename) {
209         string type = null;
210 
211         if (filename != null) {
212             ptrdiff_t i = -1;
213             while (type == null) {
214                 i = filename.indexOf(".", i + 1);
215 
216                 if (i < 0 || i >= filename.length)
217                     break;
218 
219                 string ext = std.uni.toLower(filename[i + 1 .. $]);
220                 if (type == null)
221                     type = __dftMimeMap().get(ext);
222             }
223         }
224 
225         if (type == null) {
226             if (type == null)
227                 type = __dftMimeMap().get("*");
228         }
229 
230         return type;
231     }
232 
233     /**
234      * Get the MIME type by filename extension.
235      * Lookup the content and static default mime maps.
236      *
237      * @param filename A file name
238      * @return MIME type matching the longest dot extension of the
239      * file name.
240      */
241     string getMimeByExtension(string filename) {
242         string type = null;
243 
244         if (filename != null) {
245             ptrdiff_t i = -1;
246             while (type == null) {
247                 i = filename.indexOf(".", i + 1);
248 
249                 if (i < 0 || i >= filename.length)
250                     break;
251 
252                 string ext = std.uni.toLower(filename[i + 1 .. $]);
253                 if (_mimeMap !is null && _mimeMap.containsKey(ext))
254                     type = _mimeMap.get(ext);
255                 if (type == null && __dftMimeMap.containsKey(ext))
256                     type = __dftMimeMap.get(ext);
257             }
258         }
259 
260         if (type == null) {
261             if (_mimeMap !is null && _mimeMap.containsKey("*"))
262                 type = _mimeMap.get("*");
263             if (type == null && __dftMimeMap.containsKey("*"))
264                 type = __dftMimeMap.get("*");
265         }
266 
267         return type;
268     }
269 
270     /**
271      * Set a mime mapping
272      *
273      * @param extension the extension
274      * @param type      the mime type
275      */
276     void addMimeMapping(string extension, string type) {
277         _mimeMap.put(std.uni.toLower(extension), normalizeMimeType(type));
278     }
279 
280     static Set!string getKnownMimeTypes() {
281         auto hs = new HashSet!(string)();
282         foreach(v ; __dftMimeMap.byValue())
283             hs.add(v);
284         return hs;
285     }
286 
287     private static string normalizeMimeType(string type) {
288         MimeType t = CACHE.get(type, null);
289         if (t !is null)
290             return t.asString();
291 
292         return std.uni.toLower(type);
293     }
294 
295     static string getCharsetFromContentType(string value) {
296         if (value == null)
297             return null;
298         int end = cast(int)value.length;
299         int state = 0;
300         int start = 0;
301         bool quote = false;
302         int i = 0;
303         for (; i < end; i++) {
304             char b = value[i];
305 
306             if (quote && state != 10) {
307                 if ('"' == b)
308                     quote = false;
309                 continue;
310             }
311 
312             if (';' == b && state <= 8) {
313                 state = 1;
314                 continue;
315             }
316 
317             switch (state) {
318                 case 0:
319                     if ('"' == b) {
320                         quote = true;
321                         break;
322                     }
323                     break;
324 
325                 case 1:
326                     if ('c' == b) state = 2;
327                     else if (' ' != b) state = 0;
328                     break;
329                 case 2:
330                     if ('h' == b) state = 3;
331                     else state = 0;
332                     break;
333                 case 3:
334                     if ('a' == b) state = 4;
335                     else state = 0;
336                     break;
337                 case 4:
338                     if ('r' == b) state = 5;
339                     else state = 0;
340                     break;
341                 case 5:
342                     if ('s' == b) state = 6;
343                     else state = 0;
344                     break;
345                 case 6:
346                     if ('e' == b) state = 7;
347                     else state = 0;
348                     break;
349                 case 7:
350                     if ('t' == b) state = 8;
351                     else state = 0;
352                     break;
353 
354                 case 8:
355                     if ('=' == b) state = 9;
356                     else if (' ' != b) state = 0;
357                     break;
358 
359                 case 9:
360                     if (' ' == b)
361                         break;
362                     if ('"' == b) {
363                         quote = true;
364                         start = i + 1;
365                         state = 10;
366                         break;
367                     }
368                     start = i;
369                     state = 10;
370                     break;
371 
372                 case 10:
373                     if (!quote && (';' == b || ' ' == b) ||
374                             (quote && '"' == b))
375                         return StringUtils.normalizeCharset(value, start, i - start);
376                     break;
377 
378                 default: break;
379             }
380         }
381 
382         if (state == 10)
383             return StringUtils.normalizeCharset(value, start, i - start);
384 
385         return null;
386     }
387 
388     /**
389      * Access a mutable map of mime type to the charset inferred from that content type.
390      * An inferred encoding is used by when encoding/decoding a stream and is
391      * explicitly set in any metadata (eg Content-MimeType).
392      *
393      * @return Map of mime type to charset
394      */
395     static Map!(string, string) getInferredEncodings() {
396         initializeEncodingsMap();
397         return _inferredEncodings;
398     }
399 
400     /**
401      * Access a mutable map of mime type to the charset assumed for that content type.
402      * An assumed encoding is used by when encoding/decoding a stream, but is not
403      * explicitly set in any metadata (eg Content-MimeType).
404      *
405      * @return Map of mime type to charset
406      */
407     static Map!(string, string) getAssumedEncodings() {
408         initializeEncodingsMap();
409         return _assumedEncodings;
410     }
411 
412     static string getCharsetInferredFromContentType(string contentType) {
413         return getInferredEncodings().get(contentType);
414     }
415 
416     static string getCharsetAssumedFromContentType(string contentType) {
417         return getAssumedEncodings().get(contentType);
418     }
419 
420     static string getContentTypeWithoutCharset(string value) {
421         int end = cast(int)value.length;
422         int state = 0;
423         int start = 0;
424         bool quote = false;
425         int i = 0;
426         StringBuilder builder = null;
427         for (; i < end; i++) {
428             char b = value[i];
429 
430             if ('"' == b) {
431                 quote = !quote;
432 
433                 switch (state) {
434                     case 11:
435                         builder.append(b);
436                         break;
437                     case 10:
438                         break;
439                     case 9:
440                         builder = new StringBuilder();
441                         builder.append(value, 0, start + 1);
442                         state = 10;
443                         break;
444                     default:
445                         start = i;
446                         state = 0;
447                 }
448                 continue;
449             }
450 
451             if (quote) {
452                 if (builder !is null && state != 10)
453                     builder.append(b);
454                 continue;
455             }
456 
457             switch (state) {
458                 case 0:
459                     if (';' == b)
460                         state = 1;
461                     else if (' ' != b)
462                         start = i;
463                     break;
464 
465                 case 1:
466                     if ('c' == b) state = 2;
467                     else if (' ' != b) state = 0;
468                     break;
469                 case 2:
470                     if ('h' == b) state = 3;
471                     else state = 0;
472                     break;
473                 case 3:
474                     if ('a' == b) state = 4;
475                     else state = 0;
476                     break;
477                 case 4:
478                     if ('r' == b) state = 5;
479                     else state = 0;
480                     break;
481                 case 5:
482                     if ('s' == b) state = 6;
483                     else state = 0;
484                     break;
485                 case 6:
486                     if ('e' == b) state = 7;
487                     else state = 0;
488                     break;
489                 case 7:
490                     if ('t' == b) state = 8;
491                     else state = 0;
492                     break;
493                 case 8:
494                     if ('=' == b) state = 9;
495                     else if (' ' != b) state = 0;
496                     break;
497 
498                 case 9:
499                     if (' ' == b)
500                         break;
501                     builder = new StringBuilder();
502                     builder.append(value, 0, start + 1);
503                     state = 10;
504                     break;
505 
506                 case 10:
507                     if (';' == b) {
508                         builder.append(b);
509                         state = 11;
510                     }
511                     break;
512 
513                 case 11:
514                     if (' ' != b)
515                         builder.append(b);
516                     break;
517                 
518                 default: break;
519             }
520         }
521         if (builder is null)
522             return value;
523         return builder.toString();
524 
525     }
526 
527     static string getContentTypeMIMEType(string contentType) {
528         if (contentType.empty) 
529             return null;
530 
531         // parsing content-type
532         string[] strings = StringUtils.split(contentType, ";");
533         return strings[0];
534     }
535 
536     static List!string getAcceptMIMETypes(string accept) {
537         if(accept.empty) 
538             new EmptyList!string(); // Collections.emptyList();
539 
540         List!string list = new ArrayList!string();
541         // parsing accept
542         string[] strings = StringUtils.split(accept, ",");
543         foreach (string str ; strings) {
544             string[] s = StringUtils.split(str, ";");
545             list.add(s[0].strip());
546         }
547         return list;
548     }
549 
550     static AcceptMimeType[] parseAcceptMIMETypes(string accept) {
551 
552         if(accept.empty) 
553             return [];
554 
555         string[] arr = StringUtils.split(accept, ",");
556         return apply(arr);
557     }
558 
559     private static AcceptMimeType[] apply(string[] stream) {
560 
561         Array!AcceptMimeType arr;
562 
563         foreach(string s; stream) {
564             string type = strip(s);
565             if(type.empty) continue;
566             string[] mimeTypeAndQuality = StringUtils.split(type, ';');
567             AcceptMimeType acceptMIMEType = new AcceptMimeType();
568             
569             // parse the MIME type
570             string[] mimeType = StringUtils.split(mimeTypeAndQuality[0].strip(), '/');
571             string parentType = mimeType[0].strip();
572             string childType = mimeType[1].strip();
573             acceptMIMEType.setParentType(parentType);
574             acceptMIMEType.setChildType(childType);
575             if (parentType == "*") {
576                 if (childType == "*") {
577                     acceptMIMEType.setMatchType(AcceptMimeMatchType.ALL);
578                 } else {
579                     acceptMIMEType.setMatchType(AcceptMimeMatchType.CHILD);
580                 }
581             } else {
582                 if (childType == "*") {
583                     acceptMIMEType.setMatchType(AcceptMimeMatchType.PARENT);
584                 } else {
585                     acceptMIMEType.setMatchType(AcceptMimeMatchType.EXACT);
586                 }
587             }
588 
589             // parse the quality
590             if (mimeTypeAndQuality.length > 1) {
591                 string q = mimeTypeAndQuality[1];
592                 string[] qualityKV = StringUtils.split(q, '=');
593                 acceptMIMEType.setQuality(to!float(qualityKV[1].strip()));
594             }
595             arr.insertBack(acceptMIMEType);
596         }
597 
598         for(size_t i=0; i<arr.length-1; i++) {
599             for(size_t j=i+1; j<arr.length; j++) {
600                 AcceptMimeType a = arr[i];
601                 AcceptMimeType b = arr[j];
602                 if(b.getQuality() > a.getQuality()) {   // The greater quality is first.
603                     arr[i] = b; arr[j] = a;
604                 }
605             }
606         }
607 
608         return arr.array();
609     }
610 }