1 /*
2  * Hunt - A refined core library for D programming language.
3  *
4  * Copyright (C) 2018-2019 HuntLabs
5  *
6  * Website: https://www.huntlabs.net/
7  *
8  * Licensed under the Apache-2.0 License.
9  *
10  */
11 
12 module hunt.util.MimeTypeUtils;
13 
14 
15 import hunt.collection;
16 import hunt.Exceptions;
17 import hunt.logging;
18 import hunt.text;
19 import hunt.util.AcceptMimeType;
20 import hunt.util.MimeType;
21 import hunt.util.ObjectUtils;
22 
23 import std.algorithm;
24 import std.array;
25 import std.ascii;
26 import std.concurrency : initOnce;
27 import std.container.array;
28 import std.conv;
29 import std.file;
30 import std.path;
31 import std.range;
32 import std.stdio;
33 import std.string;
34 import std.uni;
35 
36 
37 /**
38  * 
39  */
40 class MimeTypeUtils {
41 
42     enum EncodingProperties = import("encoding.properties");
43     enum MimeProperties = import("mime.properties");
44 
45     // Allow installing resources into a shared dir
46     private static string getResourcePrefix() {
47         mixin("string resourcePrefix = \"@DATA_PREFIX@\";");
48         // We don't want meson to replace the CONF_PREFIX here too,
49         // otherwise this would always be true.
50         if (resourcePrefix == join(["@DATA", "_PREFIX@"])) {
51             return dirName(thisExePath()) ~ "/resources";
52         } else {
53             return buildPath(resourcePrefix, "resources");
54         }
55     }
56 
57     // private __gshared static ByteBuffer[string] TYPES; // = new ArrayTrie<>(512);
58     private static Map!(string, string) __dftMimeMap() {
59         __gshared Map!(string, string) m;
60         return initOnce!m({
61             Map!(string, string) _m = new HashMap!(string, string)();
62             // auto resourcePath = getResourcePrefix();
63             // string resourceName = buildPath(resourcePath, "mime.properties");
64             // loadMimeProperties(resourceName, _m);
65             string[] lines = split(MimeProperties, newline);
66             foreach(string line; lines) {
67                 string[] parts = split(line, "=");
68                 if(parts.length < 2) continue;
69 
70                 string key = parts[0].strip().toLower();
71                 string value = normalizeMimeType(parts[1].strip());
72                 // trace(key, " = ", value);
73                 _m.put(key, value);                
74             }
75             return _m;
76         }());
77     }
78 
79     private __gshared Map!(string, string) _inferredEncodings;
80     private __gshared Map!(string, string) _assumedEncodings;
81 
82     private static void initializeEncodingsMap() {
83         __gshared bool _isEncodingsLoaded = false;
84         initOnce!(_isEncodingsLoaded)({
85             _inferredEncodings = new HashMap!(string, string)();
86             _assumedEncodings = new HashMap!(string, string)();
87 
88             foreach (MimeType type ; MimeType.values) {
89                 CACHE[type.toString()] = type;
90                 // TYPES[type.toString()] = type.asBuffer();
91 
92                 auto charset = type.toString().indexOf(";charset=");
93                 if (charset > 0) {
94                     string alt = type.toString().replace(";charset=", "; charset=");
95                     CACHE[alt] = type;
96                     // TYPES[alt] = type.asBuffer();
97                 }
98 
99                 if (type.isCharsetAssumed())
100                     _assumedEncodings.put(type.asString(), type.getCharsetString());
101             }
102 
103             // auto resourcePath = getResourcePrefix();
104             // string resourceName = buildPath(resourcePath, "encoding.properties");
105             // loadEncodingProperties(resourceName);
106 
107             string[] lines = split(EncodingProperties, newline);
108             foreach(string line; lines) {
109                 addEncoding(line);
110             }
111             return true;
112         }());
113     }
114 
115     __gshared MimeType[string] CACHE; 
116 
117 
118     private static void loadMimeProperties(string fileName, Map!(string, string) m) {
119         if(!exists(fileName)) {
120             version(HUNT_DEBUG) warningf("File does not exist: %s", fileName);
121             return;
122         }
123 
124         void doLoad() {
125             version(HUNT_DEBUG) tracef("loading MIME properties from: %s", fileName);
126             try {
127                 File f = File(fileName, "r");
128                 scope(exit) f.close();
129                 string line;
130                 int count = 0;
131                 while((line = f.readln()) !is null) {
132                     string[] parts = split(line, "=");
133                     if(parts.length < 2) continue;
134 
135                     count++;
136                     string key = parts[0].strip().toLower();
137                     string value = normalizeMimeType(parts[1].strip());
138                     // trace(key, " = ", value);
139                     m.put(key, value);
140                 }
141 
142                 if (m.size() == 0) {
143                     warningf("Empty mime types at %s", fileName);
144                 } else if (m.size() < count) {
145                     warningf("Duplicate or null mime-type extension in resource: %s", fileName);
146                 }            
147             } catch(Exception ex) {
148                 warningf(ex.toString());
149             }
150         }
151 
152         doLoad();
153     }
154 
155     private static void loadEncodingProperties(string fileName) {
156         if(!exists(fileName)) {
157             version(HUNT_DEBUG) warningf("File does not exist: %s", fileName);
158             return;
159         }
160 
161         version(HUNT_DEBUG) tracef("loading MIME properties from: %s", fileName);
162         try {
163             File f = File(fileName, "r");
164             scope(exit) f.close();
165             string line;
166             int count = 0;
167             while((line = f.readln()) !is null) {
168                 addEncoding(line);
169             }
170 
171             // if (_inferredEncodings.size() == 0) {
172             //     warningf("Empty encodings in resource: %s", fileName);
173             // } else if (_inferredEncodings.size() + _assumedEncodings.size() < count) {
174             //     warningf("Null or duplicate encodings in resource: %s", fileName);
175             // }            
176         } catch(Exception ex) {
177             warningf(ex.toString());
178         }
179     }
180 
181     /**
182      * Constructor.
183      */
184     this() {
185     }
186 
187     Map!(string, string) getMimeMap() {
188         if(_mimeMap is null)
189             _mimeMap = new HashMap!(string, string)();
190         return _mimeMap;
191     }
192 
193     private Map!(string, string) _mimeMap; 
194 
195     /**
196      * @param mimeMap A Map of file extension to mime-type.
197      */
198     void setMimeMap(Map!(string, string) mimeMap) {
199         _mimeMap.clear();
200         if (mimeMap !is null) {
201             foreach (string k, string v ; mimeMap) {
202                 _mimeMap.put(std.uni.toLower(k), normalizeMimeType(v));
203             }
204         }
205     }
206 
207     /**
208      * Get the MIME type by filename extension.
209      * Lookup only the static default mime map.
210      *
211      * @param filename A file name
212      * @return MIME type matching the longest dot extension of the
213      * file name.
214      */
215     static string getDefaultMimeByExtension(string filename) {
216         string type = null;
217 
218         if (filename != null) {
219             ptrdiff_t i = -1;
220             while (type == null) {
221                 i = filename.indexOf(".", i + 1);
222 
223                 if (i < 0 || i >= filename.length)
224                     break;
225 
226                 string ext = std.uni.toLower(filename[i + 1 .. $]);
227                 if (type == null)
228                     type = __dftMimeMap().get(ext);
229             }
230         }
231 
232         if (type == null) {
233             if (type == null)
234                 type = __dftMimeMap().get("*");
235         }
236 
237         return type;
238     }
239 
240     /**
241      * Get the MIME type by filename extension.
242      * Lookup the content and static default mime maps.
243      *
244      * @param filename A file name
245      * @return MIME type matching the longest dot extension of the
246      * file name.
247      */
248     string getMimeByExtension(string filename) {
249         string type = null;
250 
251         if (filename != null) {
252             ptrdiff_t i = -1;
253             while (type == null) {
254                 i = filename.indexOf(".", i + 1);
255 
256                 if (i < 0 || i >= filename.length)
257                     break;
258 
259                 string ext = std.uni.toLower(filename[i + 1 .. $]);
260                 if (_mimeMap !is null && _mimeMap.containsKey(ext))
261                     type = _mimeMap.get(ext);
262                 if (type == null && __dftMimeMap.containsKey(ext))
263                     type = __dftMimeMap.get(ext);
264             }
265         }
266 
267         if (type == null) {
268             if (_mimeMap !is null && _mimeMap.containsKey("*"))
269                 type = _mimeMap.get("*");
270             if (type == null && __dftMimeMap.containsKey("*"))
271                 type = __dftMimeMap.get("*");
272         }
273 
274         return type;
275     }
276 
277     /**
278      * Set a mime mapping
279      *
280      * @param extension the extension
281      * @param type      the mime type
282      */
283     void addMimeMapping(string extension, string type) {
284         _mimeMap.put(std.uni.toLower(extension), normalizeMimeType(type));
285     }
286 
287     static Set!string getKnownMimeTypes() {
288         auto hs = new HashSet!(string)();
289         foreach(v ; __dftMimeMap.byValue())
290             hs.add(v);
291         return hs;
292     }
293 
294     private static string normalizeMimeType(string type) {
295         MimeType t = CACHE.get(type, null);
296         if (t !is null)
297             return t.asString();
298 
299         return std.uni.toLower(type);
300     }
301 
302     static string getCharsetFromContentType(string value) {
303         if (value == null)
304             return null;
305         int end = cast(int)value.length;
306         int state = 0;
307         int start = 0;
308         bool quote = false;
309         int i = 0;
310         for (; i < end; i++) {
311             char b = value[i];
312 
313             if (quote && state != 10) {
314                 if ('"' == b)
315                     quote = false;
316                 continue;
317             }
318 
319             if (';' == b && state <= 8) {
320                 state = 1;
321                 continue;
322             }
323 
324             switch (state) {
325                 case 0:
326                     if ('"' == b) {
327                         quote = true;
328                         break;
329                     }
330                     break;
331 
332                 case 1:
333                     if ('c' == b) state = 2;
334                     else if (' ' != b) state = 0;
335                     break;
336                 case 2:
337                     if ('h' == b) state = 3;
338                     else state = 0;
339                     break;
340                 case 3:
341                     if ('a' == b) state = 4;
342                     else state = 0;
343                     break;
344                 case 4:
345                     if ('r' == b) state = 5;
346                     else state = 0;
347                     break;
348                 case 5:
349                     if ('s' == b) state = 6;
350                     else state = 0;
351                     break;
352                 case 6:
353                     if ('e' == b) state = 7;
354                     else state = 0;
355                     break;
356                 case 7:
357                     if ('t' == b) state = 8;
358                     else state = 0;
359                     break;
360 
361                 case 8:
362                     if ('=' == b) state = 9;
363                     else if (' ' != b) state = 0;
364                     break;
365 
366                 case 9:
367                     if (' ' == b)
368                         break;
369                     if ('"' == b) {
370                         quote = true;
371                         start = i + 1;
372                         state = 10;
373                         break;
374                     }
375                     start = i;
376                     state = 10;
377                     break;
378 
379                 case 10:
380                     if (!quote && (';' == b || ' ' == b) ||
381                             (quote && '"' == b))
382                         return StringUtils.normalizeCharset(value, start, i - start);
383                     break;
384 
385                 default: break;
386             }
387         }
388 
389         if (state == 10)
390             return StringUtils.normalizeCharset(value, start, i - start);
391 
392         return null;
393     }
394 
395     static void addEncoding(string encoding) {
396         string[] parts = split(encoding, "=");
397         if(parts.length < 2) {
398             return;
399         }
400 
401         // count++;
402         string t = parts[0].strip();
403         string charset = parts[1].strip();
404         version(HUNT_DEBUG) trace(t, " = ", charset);
405         if(charset.startsWith("-"))
406             _assumedEncodings.put(t, charset[1..$]);
407         else
408             _inferredEncodings.put(t, charset);        
409     }
410 
411     /**
412      * Access a mutable map of mime type to the charset inferred from that content type.
413      * An inferred encoding is used by when encoding/decoding a stream and is
414      * explicitly set in any metadata (eg Content-MimeType).
415      *
416      * @return Map of mime type to charset
417      */
418     static Map!(string, string) getInferredEncodings() {
419         initializeEncodingsMap();
420         return _inferredEncodings;
421     }
422 
423     /**
424      * Access a mutable map of mime type to the charset assumed for that content type.
425      * An assumed encoding is used by when encoding/decoding a stream, but is not
426      * explicitly set in any metadata (eg Content-MimeType).
427      *
428      * @return Map of mime type to charset
429      */
430     static Map!(string, string) getAssumedEncodings() {
431         initializeEncodingsMap();
432         return _assumedEncodings;
433     }
434 
435     static string getCharsetInferredFromContentType(string contentType) {
436         return getInferredEncodings().get(contentType);
437     }
438 
439     static string getCharsetAssumedFromContentType(string contentType) {
440         return getAssumedEncodings().get(contentType);
441     }
442 
443     static string getContentTypeWithoutCharset(string value) {
444         int end = cast(int)value.length;
445         int state = 0;
446         int start = 0;
447         bool quote = false;
448         int i = 0;
449         StringBuilder builder = null;
450         for (; i < end; i++) {
451             char b = value[i];
452 
453             if ('"' == b) {
454                 quote = !quote;
455 
456                 switch (state) {
457                     case 11:
458                         builder.append(b);
459                         break;
460                     case 10:
461                         break;
462                     case 9:
463                         builder = new StringBuilder();
464                         builder.append(value, 0, start + 1);
465                         state = 10;
466                         break;
467                     default:
468                         start = i;
469                         state = 0;
470                 }
471                 continue;
472             }
473 
474             if (quote) {
475                 if (builder !is null && state != 10)
476                     builder.append(b);
477                 continue;
478             }
479 
480             switch (state) {
481                 case 0:
482                     if (';' == b)
483                         state = 1;
484                     else if (' ' != b)
485                         start = i;
486                     break;
487 
488                 case 1:
489                     if ('c' == b) state = 2;
490                     else if (' ' != b) state = 0;
491                     break;
492                 case 2:
493                     if ('h' == b) state = 3;
494                     else state = 0;
495                     break;
496                 case 3:
497                     if ('a' == b) state = 4;
498                     else state = 0;
499                     break;
500                 case 4:
501                     if ('r' == b) state = 5;
502                     else state = 0;
503                     break;
504                 case 5:
505                     if ('s' == b) state = 6;
506                     else state = 0;
507                     break;
508                 case 6:
509                     if ('e' == b) state = 7;
510                     else state = 0;
511                     break;
512                 case 7:
513                     if ('t' == b) state = 8;
514                     else state = 0;
515                     break;
516                 case 8:
517                     if ('=' == b) state = 9;
518                     else if (' ' != b) state = 0;
519                     break;
520 
521                 case 9:
522                     if (' ' == b)
523                         break;
524                     builder = new StringBuilder();
525                     builder.append(value, 0, start + 1);
526                     state = 10;
527                     break;
528 
529                 case 10:
530                     if (';' == b) {
531                         builder.append(b);
532                         state = 11;
533                     }
534                     break;
535 
536                 case 11:
537                     if (' ' != b)
538                         builder.append(b);
539                     break;
540                 
541                 default: break;
542             }
543         }
544         if (builder is null)
545             return value;
546         return builder.toString();
547 
548     }
549 
550     static string getContentTypeMIMEType(string contentType) {
551         if (contentType.empty) 
552             return null;
553 
554         // parsing content-type
555         string[] strings = StringUtils.split(contentType, ";");
556         return strings[0];
557     }
558 
559     static List!string getAcceptMIMETypes(string accept) {
560         if(accept.empty) 
561             new EmptyList!string(); // Collections.emptyList();
562 
563         List!string list = new ArrayList!string();
564         // parsing accept
565         string[] strings = StringUtils.split(accept, ",");
566         foreach (string str ; strings) {
567             string[] s = StringUtils.split(str, ";");
568             list.add(s[0].strip());
569         }
570         return list;
571     }
572 
573     static AcceptMimeType[] parseAcceptMIMETypes(string accept) {
574 
575         if(accept.empty) 
576             return [];
577 
578         string[] arr = StringUtils.split(accept, ",");
579         return apply(arr);
580     }
581 
582     private static AcceptMimeType[] apply(string[] stream) {
583 
584         Array!AcceptMimeType arr;
585 
586         foreach(string s; stream) {
587             string type = strip(s);
588             if(type.empty) continue;
589             string[] mimeTypeAndQuality = StringUtils.split(type, ';');
590             AcceptMimeType acceptMIMEType = new AcceptMimeType();
591             
592             // parse the MIME type
593             string[] mimeType = StringUtils.split(mimeTypeAndQuality[0].strip(), '/');
594             string parentType = mimeType[0].strip();
595             string childType = mimeType[1].strip();
596             acceptMIMEType.setParentType(parentType);
597             acceptMIMEType.setChildType(childType);
598             if (parentType == "*") {
599                 if (childType == "*") {
600                     acceptMIMEType.setMatchType(AcceptMimeMatchType.ALL);
601                 } else {
602                     acceptMIMEType.setMatchType(AcceptMimeMatchType.CHILD);
603                 }
604             } else {
605                 if (childType == "*") {
606                     acceptMIMEType.setMatchType(AcceptMimeMatchType.PARENT);
607                 } else {
608                     acceptMIMEType.setMatchType(AcceptMimeMatchType.EXACT);
609                 }
610             }
611 
612             // parse the quality
613             if (mimeTypeAndQuality.length > 1) {
614                 string q = mimeTypeAndQuality[1];
615                 string[] qualityKV = StringUtils.split(q, '=');
616                 acceptMIMEType.setQuality(to!float(qualityKV[1].strip()));
617             }
618             arr.insertBack(acceptMIMEType);
619         }
620 
621         for(size_t i=0; i<arr.length-1; i++) {
622             for(size_t j=i+1; j<arr.length; j++) {
623                 AcceptMimeType a = arr[i];
624                 AcceptMimeType b = arr[j];
625                 if(b.getQuality() > a.getQuality()) {   // The greater quality is first.
626                     arr[i] = b; arr[j] = a;
627                 }
628             }
629         }
630 
631         return arr.array();
632     }
633 }