1 /*
2  * Hunt - A refined core library for D programming language.
3  *
4  * Copyright (C) 2018-2019 HuntLabs
5  *
6  * Website: https://www.huntlabs.net/
7  *
8  * Licensed under the Apache-2.0 License.
9  *
10  */
11 
12 module hunt.util.MimeTypeUtils;
13 
14 
15 import hunt.collection;
16 import hunt.Exceptions;
17 import hunt.logging.ConsoleLogger;
18 import hunt.text;
19 import hunt.util.AcceptMimeType;
20 import hunt.util.MimeType;
21 import hunt.util.ObjectUtils;
22 
23 import std.algorithm;
24 import std.array;
25 import std.ascii;
26 import std.concurrency : initOnce;
27 import std.container.array;
28 import std.conv;
29 import std.file;
30 import std.path;
31 import std.range;
32 import std.stdio;
33 import std.string;
34 import std.uni;
35 
36 
37 /**
38  * 
39  */
40 class MimeTypeUtils {
41 
42     enum EncodingProperties = import("encoding.properties");
43     enum MimeProperties = import("mime.properties");
44 
45     // Allow installing resources into a shared dir
46     private static string getResourcePrefix() {
47         mixin("string resourcePrefix = \"@DATA_PREFIX@\";");
48         // We don't want meson to replace the CONF_PREFIX here too,
49         // otherwise this would always be true.
50         if (resourcePrefix == join(["@DATA", "_PREFIX@"])) {
51             return dirName(thisExePath()) ~ "/resources";
52         } else {
53             return buildPath(resourcePrefix, "resources");
54         }
55     }
56 
57     // private __gshared static ByteBuffer[string] TYPES; // = new ArrayTrie<>(512);
58     private static Map!(string, string) __dftMimeMap() {
59         __gshared Map!(string, string) m;
60         return initOnce!m({
61             Map!(string, string) _m = new HashMap!(string, string)();
62             // auto resourcePath = getResourcePrefix();
63             // string resourceName = buildPath(resourcePath, "mime.properties");
64             // loadMimeProperties(resourceName, _m);
65             string[] lines = split(MimeProperties, newline);
66             foreach(string line; lines) {
67                 string[] parts = split(line, "=");
68                 if(parts.length < 2) continue;
69 
70                 string key = parts[0].strip().toLower();
71                 string value = normalizeMimeType(parts[1].strip());
72                 // trace(key, " = ", value);
73                 _m.put(key, value);                
74             }
75             return _m;
76         }());
77     }
78 
79     private __gshared Map!(string, string) _inferredEncodings;
80     private __gshared Map!(string, string) _assumedEncodings;
81 
82     private static void initializeEncodingsMap() {
83         __gshared bool _isEncodingsLoaded = false;
84         initOnce!(_isEncodingsLoaded)({
85             _inferredEncodings = new HashMap!(string, string)();
86             _assumedEncodings = new HashMap!(string, string)();
87 
88             foreach (MimeType type ; MimeType.values) {
89                 CACHE[type.toString()] = type;
90                 // TYPES[type.toString()] = type.asBuffer();
91 
92                 auto charset = type.toString().indexOf(";charset=");
93                 if (charset > 0) {
94                     string alt = type.toString().replace(";charset=", "; charset=");
95                     CACHE[alt] = type;
96                     // TYPES[alt] = type.asBuffer();
97                 }
98 
99                 if (type.isCharsetAssumed())
100                     _assumedEncodings.put(type.asString(), type.getCharsetString());
101             }
102 
103             // auto resourcePath = getResourcePrefix();
104             // string resourceName = buildPath(resourcePath, "encoding.properties");
105             // loadEncodingProperties(resourceName);
106 
107             string[] lines = split(EncodingProperties, newline);
108             foreach(string line; lines) {
109                 addEncoding(line);
110             }
111             return true;
112         }());
113     }
114 
115     __gshared MimeType[string] CACHE; 
116 
117 
118     private static void loadMimeProperties(string fileName, Map!(string, string) m) {
119         if(!exists(fileName)) {
120             version(HUNT_DEBUG) warningf("File does not exist: %s", fileName);
121             return;
122         }
123 
124         void doLoad() {
125             version(HUNT_DEBUG) tracef("loading MIME properties from: %s", fileName);
126             try {
127                 File f = File(fileName, "r");
128                 scope(exit) f.close();
129                 string line;
130                 int count = 0;
131                 while((line = f.readln()) !is null) {
132                     string[] parts = split(line, "=");
133                     if(parts.length < 2) continue;
134 
135                     count++;
136                     string key = parts[0].strip().toLower();
137                     string value = normalizeMimeType(parts[1].strip());
138                     // trace(key, " = ", value);
139                     m.put(key, value);
140                 }
141 
142                 if (m.size() == 0) {
143                     warningf("Empty mime types at %s", fileName);
144                 } else if (m.size() < count) {
145                     warningf("Duplicate or null mime-type extension in resource: %s", fileName);
146                 }            
147             } catch(Exception ex) {
148                 warningf(ex.toString());
149             }
150         }
151 
152         doLoad();
153         // import std.parallelism;
154         // auto t = task(&doLoad);
155         // t.executeInNewThread();
156     }
157 
158     private static void loadEncodingProperties(string fileName) {
159         if(!exists(fileName)) {
160             version(HUNT_DEBUG) warningf("File does not exist: %s", fileName);
161             return;
162         }
163 
164         version(HUNT_DEBUG) tracef("loading MIME properties from: %s", fileName);
165         try {
166             File f = File(fileName, "r");
167             scope(exit) f.close();
168             string line;
169             int count = 0;
170             while((line = f.readln()) !is null) {
171                 addEncoding(line);
172             }
173 
174             // if (_inferredEncodings.size() == 0) {
175             //     warningf("Empty encodings in resource: %s", fileName);
176             // } else if (_inferredEncodings.size() + _assumedEncodings.size() < count) {
177             //     warningf("Null or duplicate encodings in resource: %s", fileName);
178             // }            
179         } catch(Exception ex) {
180             warningf(ex.toString());
181         }
182     }
183 
184     /**
185      * Constructor.
186      */
187     this() {
188     }
189 
190     Map!(string, string) getMimeMap() {
191         if(_mimeMap is null)
192             _mimeMap = new HashMap!(string, string)();
193         return _mimeMap;
194     }
195 
196     private Map!(string, string) _mimeMap; 
197 
198     /**
199      * @param mimeMap A Map of file extension to mime-type.
200      */
201     void setMimeMap(Map!(string, string) mimeMap) {
202         _mimeMap.clear();
203         if (mimeMap !is null) {
204             foreach (string k, string v ; mimeMap) {
205                 _mimeMap.put(std.uni.toLower(k), normalizeMimeType(v));
206             }
207         }
208     }
209 
210     /**
211      * Get the MIME type by filename extension.
212      * Lookup only the static default mime map.
213      *
214      * @param filename A file name
215      * @return MIME type matching the longest dot extension of the
216      * file name.
217      */
218     static string getDefaultMimeByExtension(string filename) {
219         string type = null;
220 
221         if (filename != null) {
222             ptrdiff_t i = -1;
223             while (type == null) {
224                 i = filename.indexOf(".", i + 1);
225 
226                 if (i < 0 || i >= filename.length)
227                     break;
228 
229                 string ext = std.uni.toLower(filename[i + 1 .. $]);
230                 if (type == null)
231                     type = __dftMimeMap().get(ext);
232             }
233         }
234 
235         if (type == null) {
236             if (type == null)
237                 type = __dftMimeMap().get("*");
238         }
239 
240         return type;
241     }
242 
243     /**
244      * Get the MIME type by filename extension.
245      * Lookup the content and static default mime maps.
246      *
247      * @param filename A file name
248      * @return MIME type matching the longest dot extension of the
249      * file name.
250      */
251     string getMimeByExtension(string filename) {
252         string type = null;
253 
254         if (filename != null) {
255             ptrdiff_t i = -1;
256             while (type == null) {
257                 i = filename.indexOf(".", i + 1);
258 
259                 if (i < 0 || i >= filename.length)
260                     break;
261 
262                 string ext = std.uni.toLower(filename[i + 1 .. $]);
263                 if (_mimeMap !is null && _mimeMap.containsKey(ext))
264                     type = _mimeMap.get(ext);
265                 if (type == null && __dftMimeMap.containsKey(ext))
266                     type = __dftMimeMap.get(ext);
267             }
268         }
269 
270         if (type == null) {
271             if (_mimeMap !is null && _mimeMap.containsKey("*"))
272                 type = _mimeMap.get("*");
273             if (type == null && __dftMimeMap.containsKey("*"))
274                 type = __dftMimeMap.get("*");
275         }
276 
277         return type;
278     }
279 
280     /**
281      * Set a mime mapping
282      *
283      * @param extension the extension
284      * @param type      the mime type
285      */
286     void addMimeMapping(string extension, string type) {
287         _mimeMap.put(std.uni.toLower(extension), normalizeMimeType(type));
288     }
289 
290     static Set!string getKnownMimeTypes() {
291         auto hs = new HashSet!(string)();
292         foreach(v ; __dftMimeMap.byValue())
293             hs.add(v);
294         return hs;
295     }
296 
297     private static string normalizeMimeType(string type) {
298         MimeType t = CACHE.get(type, null);
299         if (t !is null)
300             return t.asString();
301 
302         return std.uni.toLower(type);
303     }
304 
305     static string getCharsetFromContentType(string value) {
306         if (value == null)
307             return null;
308         int end = cast(int)value.length;
309         int state = 0;
310         int start = 0;
311         bool quote = false;
312         int i = 0;
313         for (; i < end; i++) {
314             char b = value[i];
315 
316             if (quote && state != 10) {
317                 if ('"' == b)
318                     quote = false;
319                 continue;
320             }
321 
322             if (';' == b && state <= 8) {
323                 state = 1;
324                 continue;
325             }
326 
327             switch (state) {
328                 case 0:
329                     if ('"' == b) {
330                         quote = true;
331                         break;
332                     }
333                     break;
334 
335                 case 1:
336                     if ('c' == b) state = 2;
337                     else if (' ' != b) state = 0;
338                     break;
339                 case 2:
340                     if ('h' == b) state = 3;
341                     else state = 0;
342                     break;
343                 case 3:
344                     if ('a' == b) state = 4;
345                     else state = 0;
346                     break;
347                 case 4:
348                     if ('r' == b) state = 5;
349                     else state = 0;
350                     break;
351                 case 5:
352                     if ('s' == b) state = 6;
353                     else state = 0;
354                     break;
355                 case 6:
356                     if ('e' == b) state = 7;
357                     else state = 0;
358                     break;
359                 case 7:
360                     if ('t' == b) state = 8;
361                     else state = 0;
362                     break;
363 
364                 case 8:
365                     if ('=' == b) state = 9;
366                     else if (' ' != b) state = 0;
367                     break;
368 
369                 case 9:
370                     if (' ' == b)
371                         break;
372                     if ('"' == b) {
373                         quote = true;
374                         start = i + 1;
375                         state = 10;
376                         break;
377                     }
378                     start = i;
379                     state = 10;
380                     break;
381 
382                 case 10:
383                     if (!quote && (';' == b || ' ' == b) ||
384                             (quote && '"' == b))
385                         return StringUtils.normalizeCharset(value, start, i - start);
386                     break;
387 
388                 default: break;
389             }
390         }
391 
392         if (state == 10)
393             return StringUtils.normalizeCharset(value, start, i - start);
394 
395         return null;
396     }
397 
398     static void addEncoding(string encoding) {
399         string[] parts = split(encoding, "=");
400         if(parts.length < 2) {
401             return;
402         }
403 
404         // count++;
405         string t = parts[0].strip();
406         string charset = parts[1].strip();
407         version(HUNT_DEBUG) trace(t, " = ", charset);
408         if(charset.startsWith("-"))
409             _assumedEncodings.put(t, charset[1..$]);
410         else
411             _inferredEncodings.put(t, charset);        
412     }
413 
414     /**
415      * Access a mutable map of mime type to the charset inferred from that content type.
416      * An inferred encoding is used by when encoding/decoding a stream and is
417      * explicitly set in any metadata (eg Content-MimeType).
418      *
419      * @return Map of mime type to charset
420      */
421     static Map!(string, string) getInferredEncodings() {
422         initializeEncodingsMap();
423         return _inferredEncodings;
424     }
425 
426     /**
427      * Access a mutable map of mime type to the charset assumed for that content type.
428      * An assumed encoding is used by when encoding/decoding a stream, but is not
429      * explicitly set in any metadata (eg Content-MimeType).
430      *
431      * @return Map of mime type to charset
432      */
433     static Map!(string, string) getAssumedEncodings() {
434         initializeEncodingsMap();
435         return _assumedEncodings;
436     }
437 
438     static string getCharsetInferredFromContentType(string contentType) {
439         return getInferredEncodings().get(contentType);
440     }
441 
442     static string getCharsetAssumedFromContentType(string contentType) {
443         return getAssumedEncodings().get(contentType);
444     }
445 
446     static string getContentTypeWithoutCharset(string value) {
447         int end = cast(int)value.length;
448         int state = 0;
449         int start = 0;
450         bool quote = false;
451         int i = 0;
452         StringBuilder builder = null;
453         for (; i < end; i++) {
454             char b = value[i];
455 
456             if ('"' == b) {
457                 quote = !quote;
458 
459                 switch (state) {
460                     case 11:
461                         builder.append(b);
462                         break;
463                     case 10:
464                         break;
465                     case 9:
466                         builder = new StringBuilder();
467                         builder.append(value, 0, start + 1);
468                         state = 10;
469                         break;
470                     default:
471                         start = i;
472                         state = 0;
473                 }
474                 continue;
475             }
476 
477             if (quote) {
478                 if (builder !is null && state != 10)
479                     builder.append(b);
480                 continue;
481             }
482 
483             switch (state) {
484                 case 0:
485                     if (';' == b)
486                         state = 1;
487                     else if (' ' != b)
488                         start = i;
489                     break;
490 
491                 case 1:
492                     if ('c' == b) state = 2;
493                     else if (' ' != b) state = 0;
494                     break;
495                 case 2:
496                     if ('h' == b) state = 3;
497                     else state = 0;
498                     break;
499                 case 3:
500                     if ('a' == b) state = 4;
501                     else state = 0;
502                     break;
503                 case 4:
504                     if ('r' == b) state = 5;
505                     else state = 0;
506                     break;
507                 case 5:
508                     if ('s' == b) state = 6;
509                     else state = 0;
510                     break;
511                 case 6:
512                     if ('e' == b) state = 7;
513                     else state = 0;
514                     break;
515                 case 7:
516                     if ('t' == b) state = 8;
517                     else state = 0;
518                     break;
519                 case 8:
520                     if ('=' == b) state = 9;
521                     else if (' ' != b) state = 0;
522                     break;
523 
524                 case 9:
525                     if (' ' == b)
526                         break;
527                     builder = new StringBuilder();
528                     builder.append(value, 0, start + 1);
529                     state = 10;
530                     break;
531 
532                 case 10:
533                     if (';' == b) {
534                         builder.append(b);
535                         state = 11;
536                     }
537                     break;
538 
539                 case 11:
540                     if (' ' != b)
541                         builder.append(b);
542                     break;
543                 
544                 default: break;
545             }
546         }
547         if (builder is null)
548             return value;
549         return builder.toString();
550 
551     }
552 
553     static string getContentTypeMIMEType(string contentType) {
554         if (contentType.empty) 
555             return null;
556 
557         // parsing content-type
558         string[] strings = StringUtils.split(contentType, ";");
559         return strings[0];
560     }
561 
562     static List!string getAcceptMIMETypes(string accept) {
563         if(accept.empty) 
564             new EmptyList!string(); // Collections.emptyList();
565 
566         List!string list = new ArrayList!string();
567         // parsing accept
568         string[] strings = StringUtils.split(accept, ",");
569         foreach (string str ; strings) {
570             string[] s = StringUtils.split(str, ";");
571             list.add(s[0].strip());
572         }
573         return list;
574     }
575 
576     static AcceptMimeType[] parseAcceptMIMETypes(string accept) {
577 
578         if(accept.empty) 
579             return [];
580 
581         string[] arr = StringUtils.split(accept, ",");
582         return apply(arr);
583     }
584 
585     private static AcceptMimeType[] apply(string[] stream) {
586 
587         Array!AcceptMimeType arr;
588 
589         foreach(string s; stream) {
590             string type = strip(s);
591             if(type.empty) continue;
592             string[] mimeTypeAndQuality = StringUtils.split(type, ';');
593             AcceptMimeType acceptMIMEType = new AcceptMimeType();
594             
595             // parse the MIME type
596             string[] mimeType = StringUtils.split(mimeTypeAndQuality[0].strip(), '/');
597             string parentType = mimeType[0].strip();
598             string childType = mimeType[1].strip();
599             acceptMIMEType.setParentType(parentType);
600             acceptMIMEType.setChildType(childType);
601             if (parentType == "*") {
602                 if (childType == "*") {
603                     acceptMIMEType.setMatchType(AcceptMimeMatchType.ALL);
604                 } else {
605                     acceptMIMEType.setMatchType(AcceptMimeMatchType.CHILD);
606                 }
607             } else {
608                 if (childType == "*") {
609                     acceptMIMEType.setMatchType(AcceptMimeMatchType.PARENT);
610                 } else {
611                     acceptMIMEType.setMatchType(AcceptMimeMatchType.EXACT);
612                 }
613             }
614 
615             // parse the quality
616             if (mimeTypeAndQuality.length > 1) {
617                 string q = mimeTypeAndQuality[1];
618                 string[] qualityKV = StringUtils.split(q, '=');
619                 acceptMIMEType.setQuality(to!float(qualityKV[1].strip()));
620             }
621             arr.insertBack(acceptMIMEType);
622         }
623 
624         for(size_t i=0; i<arr.length-1; i++) {
625             for(size_t j=i+1; j<arr.length; j++) {
626                 AcceptMimeType a = arr[i];
627                 AcceptMimeType b = arr[j];
628                 if(b.getQuality() > a.getQuality()) {   // The greater quality is first.
629                     arr[i] = b; arr[j] = a;
630                 }
631             }
632         }
633 
634         return arr.array();
635     }
636 }