1 /* 2 * Hunt - A refined core library for D programming language. 3 * 4 * Copyright (C) 2018-2019 HuntLabs 5 * 6 * Website: https://www.huntlabs.net/ 7 * 8 * Licensed under the Apache-2.0 License. 9 * 10 */ 11 12 module hunt.collection.ArrayTernaryTrie; 13 14 import hunt.collection.AbstractMap; 15 import hunt.collection.AbstractTrie; 16 import hunt.io.ByteBuffer; 17 import hunt.collection.HashSet; 18 import hunt.collection.Set; 19 import hunt.collection.Trie; 20 21 import hunt.Exceptions; 22 import hunt.text; 23 24 import std.ascii; 25 import std.conv; 26 27 /** 28 * <p>A Ternary Trie string lookup data structure.</p> 29 * <p> 30 * This Trie is of a fixed size and cannot grow (which can be a good thing with regards to DOS when used as a cache). 31 * </p> 32 * <p> 33 * The Trie is stored in 3 arrays: 34 * </p> 35 * <dl> 36 * <dt>char[] _tree</dt><dd>This is semantically 2 dimensional array flattened into a 1 dimensional char array. The second dimension 37 * is that every 4 sequential elements represents a row of: character; hi index; eq index; low index, used to build a 38 * ternary trie of key strings.</dd> 39 * <dt>string[] _key</dt><dd>An array of key values where each element matches a row in the _tree array. A non zero key element 40 * indicates that the _tree row is a complete key rather than an intermediate character of a longer key.</dd> 41 * <dt>V[] _value</dt><dd>An array of values corresponding to the _key array</dd> 42 * </dl> 43 * <p>The lookup of a value will iterate through the _tree array matching characters. If the equal tree branch is followed, 44 * then the _key array is looked up to see if this is a complete match. If a match is found then the _value array is looked up 45 * to return the matching value. 46 * </p> 47 * <p> 48 * This Trie may be instantiated either as case sensitive or insensitive. 49 * </p> 50 * <p>This Trie is not Threadsafe and contains no mutual exclusion 51 * or deliberate memory barriers. It is intended for an ArrayTrie to be 52 * built by a single thread and then used concurrently by multiple threads 53 * and not mutated during that access. If concurrent mutations of the 54 * Trie is required external locks need to be applied. 55 * </p> 56 * 57 * @param (V) the Entry type 58 */ 59 class ArrayTernaryTrie(V) : AbstractTrie!(V) { 60 private enum int LO = 1; 61 private enum int EQ = 2; 62 private enum int HI = 3; 63 64 /** 65 * The Size of a Trie row is the char, and the low, equal and high 66 * child pointers 67 */ 68 private enum int ROW_SIZE = 4; 69 70 /** 71 * The Trie rows in a single array which allows a lookup of row,character 72 * to the next row in the Trie. This is actually a 2 dimensional 73 * array that has been flattened to achieve locality of reference. 74 */ 75 private byte[] _tree; 76 77 /** 78 * The key (if any) for a Trie row. 79 * A row may be a leaf, a node or both in the Trie tree. 80 */ 81 private string[] _key; 82 83 /** 84 * The value (if any) for a Trie row. 85 * A row may be a leaf, a node or both in the Trie tree. 86 */ 87 private V[] _value; 88 89 /** 90 * The number of rows allocated 91 */ 92 private byte _rows; 93 94 /* ------------------------------------------------------------ */ 95 96 /** 97 * Create a case insensitive Trie of default capacity. 98 */ 99 this() { 100 this(128); 101 } 102 103 /* ------------------------------------------------------------ */ 104 105 /** 106 * Create a Trie of default capacity 107 * 108 * @param insensitive true if the Trie is insensitive to the case of the key. 109 */ 110 this(bool insensitive) { 111 this(insensitive, 128); 112 } 113 114 /* ------------------------------------------------------------ */ 115 116 /** 117 * Create a case insensitive Trie 118 * 119 * @param capacity The capacity of the Trie, which is in the worst case 120 * is the total number of characters of all keys stored in the Trie. 121 * The capacity needed is dependent of the shared prefixes of the keys. 122 * For example, a capacity of 6 nodes is required to store keys "foo" 123 * and "bar", but a capacity of only 4 is required to 124 * store "bar" and "bat". 125 */ 126 this(int capacity) { 127 this(true, capacity); 128 } 129 130 /* ------------------------------------------------------------ */ 131 132 /** 133 * Create a Trie 134 * 135 * @param insensitive true if the Trie is insensitive to the case of the key. 136 * @param capacity The capacity of the Trie, which is in the worst case 137 * is the total number of characters of all keys stored in the Trie. 138 * The capacity needed is dependent of the shared prefixes of the keys. 139 * For example, a capacity of 6 nodes is required to store keys "foo" 140 * and "bar", but a capacity of only 4 is required to 141 * store "bar" and "bat". 142 */ 143 this(bool insensitive, int capacity) { 144 super(insensitive); 145 _value = new V[capacity]; 146 _tree = new byte[capacity * ROW_SIZE]; 147 _key = new string[capacity]; 148 } 149 150 /* ------------------------------------------------------------ */ 151 152 /** 153 * Copy Trie and change capacity by a factor 154 * 155 * @param trie the trie to copy from 156 * @param factor the factor to grow the capacity by 157 */ 158 this(ArrayTernaryTrie!(V) trie, double factor) { 159 super(trie.isCaseInsensitive()); 160 size_t capacity = cast(size_t) (trie._value.length * factor); 161 _rows = trie._rows; 162 _value = new V[capacity]; 163 _tree = new byte[capacity * ROW_SIZE]; 164 _key = new string[capacity]; 165 166 import std.algorithm; 167 size_t actualLength = min(_value.length, trie._value.length); 168 _value[0 .. actualLength] = trie._value[0 .. actualLength]; 169 170 actualLength = min(_tree.length, trie._tree.length); 171 _tree[0 .. actualLength] = trie._tree[0 .. actualLength]; 172 173 actualLength = min(_key.length, trie._key.length); 174 _key[0 .. actualLength] = trie._key[0 .. actualLength]; 175 } 176 177 /* ------------------------------------------------------------ */ 178 override 179 void clear() { 180 _rows = 0; 181 _value[] = V.init; 182 _tree[] = 0; 183 _key[] = null; 184 } 185 186 /* ------------------------------------------------------------ */ 187 override 188 bool put(string s, V v) { 189 int t = 0; 190 size_t limit = s.length; 191 int last = 0; 192 for (size_t k = 0; k < limit; k++) { 193 byte c = s[k]; 194 if (isCaseInsensitive() && c < 128) 195 c = toLower(c); 196 197 while (true) { 198 int row = ROW_SIZE * t; 199 200 // Do we need to create the new row? 201 if (t == _rows) { 202 _rows++; 203 if (_rows >= _key.length) { 204 _rows--; 205 return false; 206 } 207 _tree[row] = c; 208 } 209 210 char n = _tree[row]; 211 int diff = n - c; 212 if (diff == 0) 213 t = _tree[last = (row + EQ)]; 214 else if (diff < 0) 215 t = _tree[last = (row + LO)]; 216 else 217 t = _tree[last = (row + HI)]; 218 219 // do we need a new row? 220 if (t == 0) { 221 t = _rows; 222 _tree[last] = cast(byte) t; 223 } 224 225 if (diff == 0) 226 break; 227 } 228 } 229 230 // Do we need to create the new row? 231 if (t == _rows) { 232 _rows++; 233 if (_rows >= _key.length) { 234 _rows--; 235 return false; 236 } 237 } 238 239 // Put the key and value 240 static if(is(V == class)) { 241 _key[t] = v is null ? null : s; 242 } else { 243 _key[t] = v == V.init ? null : s; 244 } 245 246 _value[t] = v; 247 248 return true; 249 } 250 251 252 /* ------------------------------------------------------------ */ 253 override 254 V get(string s, int offset, int len) { 255 int t = 0; 256 for (int i = 0; i < len; ) { 257 byte c = s.charAt(offset + i++); 258 if (isCaseInsensitive() && c < 128) 259 c = toLower(c); 260 261 while (true) { 262 int row = ROW_SIZE * t; 263 char n = _tree[row]; 264 int diff = n - c; 265 266 if (diff == 0) { 267 t = _tree[row + EQ]; 268 if (t == 0) 269 return V.init; 270 break; 271 } 272 273 t = _tree[row + hilo(diff)]; 274 if (t == 0) 275 return V.init; 276 } 277 } 278 279 return _value[t]; 280 } 281 282 283 override 284 V get(ByteBuffer b, int offset, int len) { 285 int t = 0; 286 offset += b.position(); 287 288 for (int i = 0; i < len; ) { 289 byte c = cast(byte) (b.get(offset + i++) & 0x7f); 290 if (isCaseInsensitive()) 291 c = cast(byte) toLower(c); 292 293 while (true) { 294 int row = ROW_SIZE * t; 295 char n = _tree[row]; 296 int diff = n - c; 297 298 if (diff == 0) { 299 t = _tree[row + EQ]; 300 if (t == 0) 301 return V.init; 302 break; 303 } 304 305 t = _tree[row + hilo(diff)]; 306 if (t == 0) 307 return V.init; 308 } 309 } 310 311 return _value[t]; 312 } 313 314 /* ------------------------------------------------------------ */ 315 override 316 V getBest(string s) { 317 return getBest(0, s, 0, cast(int)s.length); 318 } 319 320 /* ------------------------------------------------------------ */ 321 override 322 V getBest(string s, int offset, int length) { 323 return getBest(0, s, offset, length); 324 } 325 326 /* ------------------------------------------------------------ */ 327 private V getBest(int t, string s, int offset, int len) { 328 int node = t; 329 int end = offset + len; 330 loop: 331 while (offset < end) { 332 byte c = s.charAt(offset++); 333 len--; 334 if (isCaseInsensitive() && c < 128) 335 c = toLower(c); 336 337 while (true) { 338 int row = ROW_SIZE * t; 339 char n = _tree[row]; 340 int diff = n - c; 341 342 if (diff == 0) { 343 t = _tree[row + EQ]; 344 if (t == 0) 345 break loop; 346 347 // if this node is a match, recurse to remember 348 if (_key[t] !is null) { 349 node = t; 350 V best = getBest(t, s, offset, len); 351 static if(is(V == class)) { 352 if (best !is null) 353 return best; 354 } else { 355 if (best != V.init) 356 return best; 357 } 358 } 359 break; 360 } 361 362 t = _tree[row + hilo(diff)]; 363 if (t == 0) 364 break loop; 365 } 366 } 367 return _value[node]; 368 } 369 370 371 /* ------------------------------------------------------------ */ 372 override 373 V getBest(ByteBuffer b, int offset, int len) { 374 if (b.hasArray()) 375 return getBest(0, b.array(), b.arrayOffset() + b.position() + offset, len); 376 return getBest(0, b, offset, len); 377 } 378 379 /* ------------------------------------------------------------ */ 380 private V getBest(int t, byte[] b, int offset, int len) { 381 int node = t; 382 int end = offset + len; 383 loop: 384 while (offset < end) { 385 byte c = cast(byte) (b[offset++] & 0x7f); 386 len--; 387 if (isCaseInsensitive()) 388 c = cast(byte) toLower(c); 389 390 while (true) { 391 int row = ROW_SIZE * t; 392 char n = _tree[row]; 393 int diff = n - c; 394 395 if (diff == 0) { 396 t = _tree[row + EQ]; 397 if (t == 0) 398 break loop; 399 400 // if this node is a match, recurse to remember 401 if (_key[t] !is null) { 402 node = t; 403 V best = getBest(t, b, offset, len); 404 static if(is(V == class)) { 405 if (best !is null) 406 return best; 407 } else { 408 if (best != V.init) 409 return best; 410 } 411 } 412 break; 413 } 414 415 t = _tree[row + hilo(diff)]; 416 if (t == 0) 417 break loop; 418 } 419 } 420 return _value[node]; 421 } 422 423 /* ------------------------------------------------------------ */ 424 private V getBest(int t, ByteBuffer b, int offset, int len) { 425 int node = t; 426 int o = offset + b.position(); 427 428 loop: 429 for (int i = 0; i < len; i++) { 430 byte c = cast(byte) (b.get(o + i) & 0x7f); 431 if (isCaseInsensitive()) 432 c = cast(byte) toLower(c); 433 434 while (true) { 435 int row = ROW_SIZE * t; 436 char n = _tree[row]; 437 int diff = n - c; 438 439 if (diff == 0) { 440 t = _tree[row + EQ]; 441 if (t == 0) 442 break loop; 443 444 // if this node is a match, recurse to remember 445 if (_key[t] !is null) { 446 node = t; 447 V best = getBest(t, b, offset + i + 1, len - i - 1); 448 449 static if(is(V == class)) { 450 if (best !is null) 451 return best; 452 } else { 453 if (best != V.init) 454 return best; 455 } 456 } 457 break; 458 } 459 460 t = _tree[row + hilo(diff)]; 461 if (t == 0) 462 break loop; 463 } 464 } 465 return _value[node]; 466 } 467 468 override 469 string toString() { 470 StringBuilder buf = new StringBuilder(); 471 for (int r = 0; r <= _rows; r++) { 472 static if(is(V == class)) { 473 if (_key[r] !is null && _value[r] !is null) { 474 buf.append(','); 475 buf.append(_key[r]); 476 buf.append('='); 477 buf.append(_value[r].to!string()); 478 } 479 } else { 480 if (_key[r] !is null && _value[r] != V.init) { 481 buf.append(','); 482 buf.append(_key[r]); 483 buf.append('='); 484 buf.append(_value[r].to!string()); 485 } 486 } 487 } 488 if (buf.length() == 0) 489 return "{}"; 490 491 buf.setCharAt(0, '{'); 492 buf.append('}'); 493 return buf.toString(); 494 } 495 496 497 override 498 Set!(string) keySet() { 499 Set!(string) keys = new HashSet!string(); 500 501 for (int r = 0; r <= _rows; r++) { 502 static if(is(V == class)) { 503 if (_key[r] !is null && _value[r] !is null) 504 keys.add(_key[r]); 505 } else { 506 if (_key[r] !is null && _value[r] != V.init) 507 keys.add(_key[r]); 508 } 509 } 510 return keys; 511 } 512 513 int size() { 514 int s = 0; 515 for (int r = 0; r <= _rows; r++) { 516 517 static if(is(V == class)) { 518 if (_key[r] !is null && _value[r] !is null) 519 s++; 520 } else { 521 if (_key[r] !is null && _value[r] != V.init) 522 s++; 523 } 524 525 } 526 return s; 527 } 528 529 bool isEmpty() { 530 for (int r = 0; r <= _rows; r++) { 531 static if(is(V == class)) { 532 if (_key[r] !is null && _value[r] !is null) 533 return false; 534 } else { 535 if (_key[r] !is null && _value[r] != V.init) 536 return false; 537 } 538 } 539 return true; 540 } 541 542 543 // Set<Map.Entry<string, V>> entrySet() { 544 // Set<Map.Entry<string, V>> entries = new HashSet<>(); 545 // for (int r = 0; r <= _rows; r++) { 546 // if (_key[r] !is null && _value[r] !is null) 547 // entries.add(new AbstractMap.SimpleEntry<>(_key[r], _value[r])); 548 // } 549 // return entries; 550 // } 551 552 override 553 bool isFull() { 554 return _rows + 1 == _key.length; 555 } 556 557 static int hilo(int diff) { 558 // branchless equivalent to return ((diff<0)?LO:HI); 559 // return 3+2*((diff&Integer.MIN_VALUE)>>Integer.SIZE-1); 560 return 1 + (diff | int.max) / (int.max / 2); 561 } 562 563 void dump() { 564 import std.stdio; 565 for (int r = 0; r < _rows; r++) { 566 byte c = _tree[r * ROW_SIZE + 0]; 567 writefln("%4d [%s,%d,%d,%d] '%s':%s", 568 r, 569 (c < ' ' || c > 127) ? to!string(cast(int) c) : "'" ~ c ~ "'", 570 cast(int) _tree[r * ROW_SIZE + LO], 571 cast(int) _tree[r * ROW_SIZE + EQ], 572 cast(int) _tree[r * ROW_SIZE + HI], 573 _key[r], 574 _value[r]); 575 } 576 577 } 578 } 579 580 581 class Growing(V) : Trie!(V) { 582 private int _growby; 583 private ArrayTernaryTrie!(V) _trie; 584 585 this() { 586 this(1024, 1024); 587 } 588 589 this(int capacity, int growby) { 590 _growby = growby; 591 _trie = new ArrayTernaryTrie!V(capacity); 592 } 593 594 this(bool insensitive, int capacity, int growby) { 595 _growby = growby; 596 _trie = new ArrayTernaryTrie!V(insensitive, capacity); 597 } 598 599 bool put(V v) { 600 return put(v.toString(), v); 601 } 602 603 int hashCode() { 604 return _trie.hashCode(); 605 } 606 607 V remove(string s) { 608 return _trie.remove(s); 609 } 610 611 V get(string s) { 612 return _trie.get(s); 613 } 614 615 V get(ByteBuffer b) { 616 return _trie.get(b); 617 } 618 619 V getBest(byte[] b, int offset, int len) { 620 return _trie.getBest(b, offset, len); 621 } 622 623 bool isCaseInsensitive() { 624 return _trie.isCaseInsensitive(); 625 } 626 627 bool equals(Object obj) { 628 return _trie.equals(obj); 629 } 630 631 void clear() { 632 _trie.clear(); 633 } 634 635 bool put(string s, V v) { 636 bool added = _trie.put(s, v); 637 while (!added && _growby > 0) { 638 ArrayTernaryTrie!(V) bigger = new ArrayTernaryTrie!V(_trie._key.length + _growby); 639 foreach (string key, V value ; _trie) 640 bigger.put(key, value); 641 _trie = bigger; 642 added = _trie.put(s, v); 643 } 644 645 return added; 646 } 647 648 V get(string s, int offset, int len) { 649 return _trie.get(s, offset, len); 650 } 651 652 V get(ByteBuffer b, int offset, int len) { 653 return _trie.get(b, offset, len); 654 } 655 656 V getBest(string s) { 657 return _trie.getBest(s); 658 } 659 660 V getBest(string s, int offset, int length) { 661 return _trie.getBest(s, offset, length); 662 } 663 664 V getBest(ByteBuffer b, int offset, int len) { 665 return _trie.getBest(b, offset, len); 666 } 667 668 string toString() { 669 return _trie.toString(); 670 } 671 672 // Set!(string) keySet() { 673 // return _trie.keySet(); 674 // } 675 676 bool isFull() { 677 return false; 678 } 679 680 void dump() { 681 _trie.dump(); 682 } 683 684 bool isEmpty() { 685 return _trie.isEmpty(); 686 } 687 688 int size() { 689 return _trie.size(); 690 } 691 692 }