1 /*
2  * Hunt - A refined core library for D programming language.
3  *
4  * Copyright (C) 2018-2019 HuntLabs
5  *
6  * Website: https://www.huntlabs.net/
7  *
8  * Licensed under the Apache-2.0 License.
9  *
10  */
11 
12 module hunt.collection.ArrayTernaryTrie;
13 
14 import hunt.collection.AbstractMap;
15 import hunt.collection.AbstractTrie;
16 import hunt.io.ByteBuffer;
17 import hunt.collection.HashSet;
18 import hunt.collection.Set;
19 import hunt.collection.Trie;
20 
21 import hunt.Exceptions;
22 import hunt.text;
23 
24 import std.ascii;
25 import std.conv;
26 
27 /**
28  * <p>A Ternary Trie string lookup data structure.</p>
29  * <p>
30  * This Trie is of a fixed size and cannot grow (which can be a good thing with regards to DOS when used as a cache).
31  * </p>
32  * <p>
33  * The Trie is stored in 3 arrays:
34  * </p>
35  * <dl>
36  * <dt>char[] _tree</dt><dd>This is semantically 2 dimensional array flattened into a 1 dimensional char array. The second dimension
37  * is that every 4 sequential elements represents a row of: character; hi index; eq index; low index, used to build a
38  * ternary trie of key strings.</dd>
39  * <dt>string[] _key</dt><dd>An array of key values where each element matches a row in the _tree array. A non zero key element
40  * indicates that the _tree row is a complete key rather than an intermediate character of a longer key.</dd>
41  * <dt>V[] _value</dt><dd>An array of values corresponding to the _key array</dd>
42  * </dl>
43  * <p>The lookup of a value will iterate through the _tree array matching characters. If the equal tree branch is followed,
44  * then the _key array is looked up to see if this is a complete match.  If a match is found then the _value array is looked up
45  * to return the matching value.
46  * </p>
47  * <p>
48  * This Trie may be instantiated either as case sensitive or insensitive.
49  * </p>
50  * <p>This Trie is not Threadsafe and contains no mutual exclusion
51  * or deliberate memory barriers.  It is intended for an ArrayTrie to be
52  * built by a single thread and then used concurrently by multiple threads
53  * and not mutated during that access.  If concurrent mutations of the
54  * Trie is required external locks need to be applied.
55  * </p>
56  *
57  * @param (V) the Entry type
58  */
59 class ArrayTernaryTrie(V) : AbstractTrie!(V) {
60     private enum int LO = 1;
61     private enum int EQ = 2;
62     private enum int HI = 3;
63 
64     /**
65      * The Size of a Trie row is the char, and the low, equal and high
66      * child pointers
67      */
68     private enum int ROW_SIZE = 4;
69 
70     /**
71      * The Trie rows in a single array which allows a lookup of row,character
72      * to the next row in the Trie.  This is actually a 2 dimensional
73      * array that has been flattened to achieve locality of reference.
74      */
75     private byte[] _tree;
76 
77     /**
78      * The key (if any) for a Trie row.
79      * A row may be a leaf, a node or both in the Trie tree.
80      */
81     private string[] _key;
82 
83     /**
84      * The value (if any) for a Trie row.
85      * A row may be a leaf, a node or both in the Trie tree.
86      */
87     private V[] _value;
88 
89     /**
90      * The number of rows allocated
91      */
92     private byte _rows;
93 
94     /* ------------------------------------------------------------ */
95 
96     /**
97      * Create a case insensitive Trie of default capacity.
98      */
99     this() {
100         this(128);
101     }
102 
103     /* ------------------------------------------------------------ */
104 
105     /**
106      * Create a Trie of default capacity
107      *
108      * @param insensitive true if the Trie is insensitive to the case of the key.
109      */
110     this(bool insensitive) {
111         this(insensitive, 128);
112     }
113 
114     /* ------------------------------------------------------------ */
115 
116     /**
117      * Create a case insensitive Trie
118      *
119      * @param capacity The capacity of the Trie, which is in the worst case
120      *                 is the total number of characters of all keys stored in the Trie.
121      *                 The capacity needed is dependent of the shared prefixes of the keys.
122      *                 For example, a capacity of 6 nodes is required to store keys "foo"
123      *                 and "bar", but a capacity of only 4 is required to
124      *                 store "bar" and "bat".
125      */
126     this(int capacity) {
127         this(true, capacity);
128     }
129 
130     /* ------------------------------------------------------------ */
131 
132     /**
133      * Create a Trie
134      *
135      * @param insensitive true if the Trie is insensitive to the case of the key.
136      * @param capacity    The capacity of the Trie, which is in the worst case
137      *                    is the total number of characters of all keys stored in the Trie.
138      *                    The capacity needed is dependent of the shared prefixes of the keys.
139      *                    For example, a capacity of 6 nodes is required to store keys "foo"
140      *                    and "bar", but a capacity of only 4 is required to
141      *                    store "bar" and "bat".
142      */
143     this(bool insensitive, int capacity) {
144         super(insensitive);
145         _value = new V[capacity];
146         _tree = new byte[capacity * ROW_SIZE];
147         _key = new string[capacity];
148     }
149 
150     /* ------------------------------------------------------------ */
151 
152     /**
153      * Copy Trie and change capacity by a factor
154      *
155      * @param trie   the trie to copy from
156      * @param factor the factor to grow the capacity by
157      */
158     this(ArrayTernaryTrie!(V) trie, double factor) {
159         super(trie.isCaseInsensitive());
160         size_t capacity = cast(size_t) (trie._value.length * factor);
161         _rows = trie._rows;
162         _value = new V[capacity];
163         _tree = new byte[capacity * ROW_SIZE];
164         _key = new string[capacity];
165 
166         import std.algorithm;
167         size_t actualLength = min(_value.length, trie._value.length);
168         _value[0 .. actualLength] = trie._value[0 .. actualLength];
169 
170         actualLength = min(_tree.length, trie._tree.length);
171         _tree[0 .. actualLength] = trie._tree[0 .. actualLength];
172 
173         actualLength = min(_key.length, trie._key.length);
174         _key[0 .. actualLength] = trie._key[0 .. actualLength];
175     }
176 
177     /* ------------------------------------------------------------ */
178     override
179     void clear() {
180         _rows = 0;
181         _value[] = V.init;
182         _tree[] = 0;
183         _key[] = null;
184     }
185 
186     /* ------------------------------------------------------------ */
187     override
188     bool put(string s, V v) {
189         int t = 0;
190         size_t limit = s.length;
191         int last = 0;
192         for (size_t k = 0; k < limit; k++) {
193             byte c = s[k];
194             if (isCaseInsensitive() && c < 128)
195                 c = toLower(c);
196 
197             while (true) {
198                 int row = ROW_SIZE * t;
199 
200                 // Do we need to create the new row?
201                 if (t == _rows) {
202                     _rows++;
203                     if (_rows >= _key.length) {
204                         _rows--;
205                         return false;
206                     }
207                     _tree[row] = c;
208                 }
209 
210                 char n = _tree[row];
211                 int diff = n - c;
212                 if (diff == 0)
213                     t = _tree[last = (row + EQ)];
214                 else if (diff < 0)
215                     t = _tree[last = (row + LO)];
216                 else
217                     t = _tree[last = (row + HI)];
218 
219                 // do we need a new row?
220                 if (t == 0) {
221                     t = _rows;
222                     _tree[last] = cast(byte) t;
223                 }
224 
225                 if (diff == 0)
226                     break;
227             }
228         }
229 
230         // Do we need to create the new row?
231         if (t == _rows) {
232             _rows++;
233             if (_rows >= _key.length) {
234                 _rows--;
235                 return false;
236             }
237         }
238 
239         // Put the key and value
240         static if(is(V == class)) {
241             _key[t] = v is null ? null : s;
242         } else {
243             _key[t] = v == V.init ? null : s;
244         }
245         
246         _value[t] = v;
247 
248         return true;
249     }
250 
251 
252     /* ------------------------------------------------------------ */
253     override
254     V get(string s, int offset, int len) {
255         int t = 0;
256         for (int i = 0; i < len; ) {
257             byte c = s.charAt(offset + i++);
258             if (isCaseInsensitive() && c < 128)
259                 c = toLower(c);
260 
261             while (true) {
262                 int row = ROW_SIZE * t;
263                 char n = _tree[row];
264                 int diff = n - c;
265 
266                 if (diff == 0) {
267                     t = _tree[row + EQ];
268                     if (t == 0)
269                         return V.init;
270                     break;
271                 }
272 
273                 t = _tree[row + hilo(diff)];
274                 if (t == 0)
275                     return V.init;
276             }
277         }
278 
279         return _value[t];
280     }
281 
282 
283     override
284     V get(ByteBuffer b, int offset, int len) {
285         int t = 0;
286         offset += b.position();
287 
288         for (int i = 0; i < len; ) {
289             byte c = cast(byte) (b.get(offset + i++) & 0x7f);
290             if (isCaseInsensitive())
291                 c = cast(byte) toLower(c);
292 
293             while (true) {
294                 int row = ROW_SIZE * t;
295                 char n = _tree[row];
296                 int diff = n - c;
297 
298                 if (diff == 0) {
299                     t = _tree[row + EQ];
300                     if (t == 0)
301                         return V.init;
302                     break;
303                 }
304 
305                 t = _tree[row + hilo(diff)];
306                 if (t == 0)
307                     return V.init;
308             }
309         }
310 
311         return _value[t];
312     }
313 
314     /* ------------------------------------------------------------ */
315     override
316     V getBest(string s) {
317         return getBest(0, s, 0, cast(int)s.length);
318     }
319 
320     /* ------------------------------------------------------------ */
321     override
322     V getBest(string s, int offset, int length) {
323         return getBest(0, s, offset, length);
324     }
325 
326     /* ------------------------------------------------------------ */
327     private V getBest(int t, string s, int offset, int len) {
328         int node = t;
329         int end = offset + len;
330         loop:
331         while (offset < end) {
332             byte c = s.charAt(offset++);
333             len--;
334             if (isCaseInsensitive() && c < 128)
335                 c = toLower(c);
336 
337             while (true) {
338                 int row = ROW_SIZE * t;
339                 char n = _tree[row];
340                 int diff = n - c;
341 
342                 if (diff == 0) {
343                     t = _tree[row + EQ];
344                     if (t == 0)
345                         break loop;
346 
347                     // if this node is a match, recurse to remember
348                     if (_key[t] !is null) {
349                         node = t;
350                         V best = getBest(t, s, offset, len);
351                         static if(is(V == class)) {
352                             if (best !is null)
353                                 return best;
354                         } else {
355                             if (best != V.init)
356                                 return best;
357                         }
358                     }
359                     break;
360                 }
361 
362                 t = _tree[row + hilo(diff)];
363                 if (t == 0)
364                     break loop;
365             }
366         }
367         return _value[node];
368     }
369 
370 
371     /* ------------------------------------------------------------ */
372     override
373     V getBest(ByteBuffer b, int offset, int len) {
374         if (b.hasArray())
375             return getBest(0, b.array(), b.arrayOffset() + b.position() + offset, len);
376         return getBest(0, b, offset, len);
377     }
378 
379     /* ------------------------------------------------------------ */
380     private V getBest(int t, byte[] b, int offset, int len) {
381         int node = t;
382         int end = offset + len;
383         loop:
384         while (offset < end) {
385             byte c = cast(byte) (b[offset++] & 0x7f);
386             len--;
387             if (isCaseInsensitive())
388                 c = cast(byte) toLower(c);
389 
390             while (true) {
391                 int row = ROW_SIZE * t;
392                 char n = _tree[row];
393                 int diff = n - c;
394 
395                 if (diff == 0) {
396                     t = _tree[row + EQ];
397                     if (t == 0)
398                         break loop;
399 
400                     // if this node is a match, recurse to remember
401                     if (_key[t] !is null) {
402                         node = t;
403                         V best = getBest(t, b, offset, len);
404                         static if(is(V == class)) {
405                         if (best !is null)
406                             return best;
407                         } else {
408                             if (best != V.init)
409                                 return best;
410                         }
411                     }
412                     break;
413                 }
414 
415                 t = _tree[row + hilo(diff)];
416                 if (t == 0)
417                     break loop;
418             }
419         }
420         return _value[node];
421     }
422 
423     /* ------------------------------------------------------------ */
424     private V getBest(int t, ByteBuffer b, int offset, int len) {
425         int node = t;
426         int o = offset + b.position();
427 
428         loop:
429         for (int i = 0; i < len; i++) {
430             byte c = cast(byte) (b.get(o + i) & 0x7f);
431             if (isCaseInsensitive())
432                 c = cast(byte) toLower(c);
433 
434             while (true) {
435                 int row = ROW_SIZE * t;
436                 char n = _tree[row];
437                 int diff = n - c;
438 
439                 if (diff == 0) {
440                     t = _tree[row + EQ];
441                     if (t == 0)
442                         break loop;
443 
444                     // if this node is a match, recurse to remember
445                     if (_key[t] !is null) {
446                         node = t;
447                         V best = getBest(t, b, offset + i + 1, len - i - 1);
448                         
449                         static if(is(V == class)) {
450                             if (best !is null)
451                                 return best;
452                         } else {
453                             if (best != V.init)
454                                 return best;
455                         }
456                     }
457                     break;
458                 }
459 
460                 t = _tree[row + hilo(diff)];
461                 if (t == 0)
462                     break loop;
463             }
464         }
465         return _value[node];
466     }
467 
468     override
469     string toString() {
470         StringBuilder buf = new StringBuilder();
471         for (int r = 0; r <= _rows; r++) {
472             static if(is(V == class)) {                
473                 if (_key[r] !is null && _value[r] !is null) {
474                     buf.append(',');
475                     buf.append(_key[r]);
476                     buf.append('=');
477                     buf.append(_value[r].to!string());
478                 }
479             } else { 
480                 if (_key[r] !is null && _value[r] != V.init) {
481                     buf.append(',');
482                     buf.append(_key[r]);
483                     buf.append('=');
484                     buf.append(_value[r].to!string());
485                 }
486             }
487         }
488         if (buf.length() == 0)
489             return "{}";
490 
491         buf.setCharAt(0, '{');
492         buf.append('}');
493         return buf.toString();
494     }
495 
496 
497     override
498     Set!(string) keySet() {
499         Set!(string) keys = new HashSet!string();
500 
501         for (int r = 0; r <= _rows; r++) {
502             static if(is(V == class)) {
503                 if (_key[r] !is null && _value[r] !is null)
504                     keys.add(_key[r]);
505             } else {
506                 if (_key[r] !is null && _value[r] != V.init)
507                     keys.add(_key[r]);
508             }
509         }
510         return keys;
511     }
512 
513     int size() {
514         int s = 0;
515         for (int r = 0; r <= _rows; r++) {
516             
517             static if(is(V == class)) {
518                 if (_key[r] !is null && _value[r] !is null)
519                     s++;
520             } else {
521                 if (_key[r] !is null && _value[r] != V.init)
522                     s++;
523             }
524 
525         }
526         return s;
527     }
528 
529     bool isEmpty() {
530         for (int r = 0; r <= _rows; r++) {
531             static if(is(V == class)) {
532                 if (_key[r] !is null && _value[r] !is null)
533                     return false;
534             } else {
535                 if (_key[r] !is null && _value[r] != V.init)
536                     return false;
537             }
538         }
539         return true;
540     }
541 
542 
543     // Set<Map.Entry<string, V>> entrySet() {
544     //     Set<Map.Entry<string, V>> entries = new HashSet<>();
545     //     for (int r = 0; r <= _rows; r++) {
546     //         if (_key[r] !is null && _value[r] !is null)
547     //             entries.add(new AbstractMap.SimpleEntry<>(_key[r], _value[r]));
548     //     }
549     //     return entries;
550     // }
551 
552     override
553     bool isFull() {
554         return _rows + 1 == _key.length;
555     }
556 
557     static int hilo(int diff) {
558         // branchless equivalent to return ((diff<0)?LO:HI);
559         // return 3+2*((diff&Integer.MIN_VALUE)>>Integer.SIZE-1);
560         return 1 + (diff | int.max) / (int.max / 2);
561     }
562 
563     void dump() {
564         import std.stdio;
565         for (int r = 0; r < _rows; r++) {
566             byte c = _tree[r * ROW_SIZE + 0];
567             writefln("%4d [%s,%d,%d,%d] '%s':%s",
568                     r,
569                     (c < ' ' || c > 127) ? to!string(cast(int) c) : "'" ~ c ~ "'",
570                     cast(int) _tree[r * ROW_SIZE + LO],
571                     cast(int) _tree[r * ROW_SIZE + EQ],
572                     cast(int) _tree[r * ROW_SIZE + HI],
573                     _key[r],
574                     _value[r]);
575         }
576 
577     }
578 }
579 
580 
581 class Growing(V) : Trie!(V) {
582     private int _growby;
583     private ArrayTernaryTrie!(V) _trie;
584 
585     this() {
586         this(1024, 1024);
587     }
588 
589     this(int capacity, int growby) {
590         _growby = growby;
591         _trie = new ArrayTernaryTrie!V(capacity);
592     }
593 
594     this(bool insensitive, int capacity, int growby) {
595         _growby = growby;
596         _trie = new ArrayTernaryTrie!V(insensitive, capacity);
597     }
598 
599     bool put(V v) {
600         return put(v.toString(), v);
601     }
602 
603     int hashCode() {
604         return _trie.hashCode();
605     }
606 
607     V remove(string s) {
608         return _trie.remove(s);
609     }
610 
611     V get(string s) {
612         return _trie.get(s);
613     }
614 
615     V get(ByteBuffer b) {
616         return _trie.get(b);
617     }
618 
619     V getBest(byte[] b, int offset, int len) {
620         return _trie.getBest(b, offset, len);
621     }
622 
623     bool isCaseInsensitive() {
624         return _trie.isCaseInsensitive();
625     }
626 
627     bool equals(Object obj) {
628         return _trie.equals(obj);
629     }
630 
631     void clear() {
632         _trie.clear();
633     }
634 
635     bool put(string s, V v) {
636         bool added = _trie.put(s, v);
637         while (!added && _growby > 0) {
638             ArrayTernaryTrie!(V) bigger = new ArrayTernaryTrie!V(_trie._key.length + _growby);
639             foreach (string key, V value ; _trie)
640                 bigger.put(key, value);
641             _trie = bigger;
642             added = _trie.put(s, v);
643         }
644 
645         return added;
646     }
647 
648     V get(string s, int offset, int len) {
649         return _trie.get(s, offset, len);
650     }
651 
652     V get(ByteBuffer b, int offset, int len) {
653         return _trie.get(b, offset, len);
654     }
655 
656     V getBest(string s) {
657         return _trie.getBest(s);
658     }
659 
660     V getBest(string s, int offset, int length) {
661         return _trie.getBest(s, offset, length);
662     }
663 
664     V getBest(ByteBuffer b, int offset, int len) {
665         return _trie.getBest(b, offset, len);
666     }
667 
668     string toString() {
669         return _trie.toString();
670     }
671 
672     // Set!(string) keySet() {
673     //     return _trie.keySet();
674     // }
675 
676     bool isFull() {
677         return false;
678     }
679 
680     void dump() {
681         _trie.dump();
682     }
683 
684     bool isEmpty() {
685         return _trie.isEmpty();
686     }
687 
688     int size() {
689         return _trie.size();
690     }
691 
692 }