1 /** Simple associative array implementation for D (-betterC) 2 3 The author of the original implementation: Martin Nowak 4 5 Copyright: 6 Copyright (c) 2020, Ferhat Kurtulmuş. 7 8 License: 9 $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 10 11 Simplified betterC port of druntime/blob/master/src/rt/aaA.d 12 */ 13 14 module bcaa; 15 16 version(LDC){ 17 version(D_BetterC){ 18 pragma(LDC_no_moduleinfo); 19 } 20 } 21 22 //import std.experimental.allocator.common : stateSize; 23 24 import core.stdc.string; 25 private enum { 26 // grow threshold 27 GROW_NUM = 4, 28 GROW_DEN = 5, 29 // shrink threshold 30 SHRINK_NUM = 1, 31 SHRINK_DEN = 8, 32 // grow factor 33 GROW_FAC = 4 34 } 35 // growing the AA doubles it's size, so the shrink threshold must be 36 // smaller than half the grow threshold to have a hysteresis 37 static assert(GROW_FAC * SHRINK_NUM * GROW_DEN < GROW_NUM * SHRINK_DEN); 38 39 private enum { 40 // initial load factor (for literals), mean of both thresholds 41 INIT_NUM = (GROW_DEN * SHRINK_NUM + GROW_NUM * SHRINK_DEN) / 2, 42 INIT_DEN = SHRINK_DEN * GROW_DEN, 43 44 INIT_NUM_BUCKETS = 8, 45 // magic hash constants to distinguish empty, deleted, and filled buckets 46 HASH_EMPTY = 0, 47 HASH_DELETED = 0x1, 48 HASH_FILLED_MARK = size_t(1) << 8 * size_t.sizeof - 1 49 } 50 51 private { 52 alias hash_t = size_t; 53 54 enum isSomeString(T) = is(immutable T == immutable C[], C) && (is(C == char) || is(C == wchar) || is(C == dchar)); 55 56 template KeyType(K){ 57 alias Key = K; 58 59 @nogc nothrow pure: 60 hash_t getHash(scope const Key key) @safe { 61 return key.hashOf; 62 } 63 64 bool equals(scope const Key k1, scope const Key k2) { 65 static if(is(K == const(char)*)){ 66 return strlen(k1) == strlen(k2) && 67 strcmp(k1, k2) == 0; 68 } else static if(isSomeString!K){ 69 const len = k1.length; 70 return len == k2.length && strncmp(k1.ptr, k2.ptr, len) == 0; 71 } else { 72 return k1 == k2; 73 } 74 } 75 } 76 } 77 78 /// mallocator code BEGINS 79 80 // based on std.experimental.allocator.mallocator and 81 // https://github.com/submada/basic_string/blob/main/src/basic_string/package.d: 82 83 struct Mallocator { 84 //import std.experimental.allocator.common : platformAlignment; 85 import core.stdc.stdlib: malloc, realloc, free; 86 87 //enum uint alignment = platformAlignment; 88 89 static: 90 91 void[] allocate(size_t bytes) @trusted @nogc nothrow { 92 if (!bytes) return null; 93 auto p = malloc(bytes); 94 return p ? p[0 .. bytes] : null; 95 } 96 97 void deallocate(void[] b) @system @nogc nothrow { 98 free(b.ptr); 99 } 100 101 alias 102 deallocate = free, 103 dispose = deallocate; 104 105 bool reallocate(ref void[] b, size_t s) @system @nogc nothrow { 106 if (!s){ 107 // fuzzy area in the C standard, see http://goo.gl/ZpWeSE 108 // so just deallocate and nullify the pointer 109 deallocate(b); 110 b = null; 111 return true; 112 } 113 114 auto p = cast(ubyte*) realloc(b.ptr, s); 115 if (!p) return false; 116 b = p[0 .. s]; 117 return true; 118 } 119 120 Mallocator instance; 121 } 122 123 T[] makeArray(T, Allocator)(auto ref Allocator alloc, size_t length){ 124 return cast(T[])alloc.allocate(length * T.sizeof); 125 } 126 127 T* make(T, Allocator)(auto ref Allocator alloc){ 128 return cast(T*)alloc.allocate(T.sizeof).ptr; 129 } 130 131 /// mallocator code ENDS 132 133 struct Bcaa(K, V, Allocator = Mallocator) { 134 135 struct Node{ 136 K key; 137 V val; 138 139 alias value = val; 140 } 141 142 struct Bucket { 143 private pure nothrow @nogc @safe: 144 size_t hash; 145 Node* entry; 146 @property bool empty() const { 147 return hash == HASH_EMPTY; 148 } 149 150 @property bool deleted() const { 151 return hash == HASH_DELETED; 152 } 153 154 @property bool filled() const { 155 return cast(ptrdiff_t) hash < 0; 156 } 157 } 158 159 private: 160 Bucket[] buckets; 161 162 void allocHtable(size_t sz) @nogc nothrow { 163 auto _htable = allocator.makeArray!Bucket(sz); 164 _htable[] = Bucket.init; 165 buckets = _htable; 166 } 167 168 void initTableIfNeeded() @nogc nothrow { 169 if (buckets is null) { 170 allocHtable(INIT_NUM_BUCKETS); 171 firstUsed = INIT_NUM_BUCKETS; 172 } 173 } 174 175 public: 176 uint firstUsed; 177 uint used; 178 uint deleted; 179 180 alias TKey = KeyType!K; 181 182 alias allocator = Allocator.instance; 183 184 // for GC usages 185 // opApply will be deprecated. Use byKeyValue instead 186 int opApply(int delegate(AAPair!(K, V)) dg) { 187 if (buckets is null || buckets.length == 0) 188 return 0; 189 int result = 0; 190 foreach (ref b; buckets[firstUsed .. $]){ 191 if (!b.filled) 192 continue; 193 result = dg(AAPair!(K, V)(&b.entry.key, &b.entry.val)); 194 if (result) { 195 break; 196 } 197 } 198 return 0; 199 } 200 201 @nogc nothrow: 202 @property pure @safe { 203 size_t length() const 204 in(used >= deleted) { 205 return used - deleted; 206 } 207 208 size_t dim() const { 209 return buckets.length; 210 } 211 212 size_t mask() const { 213 return dim - 1; 214 } 215 } 216 217 inout(Bucket)* findSlotInsert(size_t hash) inout pure { 218 for (size_t i = hash & mask, j = 1;; ++j){ 219 if (!buckets[i].filled) 220 return &buckets[i]; 221 i = (i + j) & mask; 222 } 223 } 224 225 inout(Bucket)* findSlotLookup(size_t hash, scope const K key) inout { 226 for (size_t i = hash & mask, j = 1;; ++j){ 227 228 if (buckets[i].hash == hash && TKey.equals(key, buckets[i].entry.key)) 229 return &buckets[i]; 230 231 if (buckets[i].empty) 232 return null; 233 i = (i + j) & mask; 234 } 235 } 236 237 void set(scope const K key, scope const V val) { 238 initTableIfNeeded(); 239 240 const keyHash = calcHash(key); 241 242 if (auto p = findSlotLookup(keyHash, key)){ 243 p.entry.val = cast(V)val; 244 return; 245 } 246 247 auto p = findSlotInsert(keyHash); 248 249 if (p.deleted) 250 --deleted; 251 252 // check load factor and possibly grow 253 else if (++used * GROW_DEN > dim * GROW_NUM){ 254 grow(); 255 p = findSlotInsert(keyHash); 256 //assert(p.empty); 257 } 258 259 // update search cache and allocate entry 260 uint m = cast(uint)(p - buckets.ptr); 261 if (m < firstUsed) { 262 firstUsed = m; 263 } 264 265 p.hash = keyHash; 266 267 if (p.deleted){ 268 p.entry.key = key; 269 p.entry.val = cast(V)val; 270 } else { 271 Node* newNode = allocator.make!Node(); 272 newNode.key = key; 273 newNode.val = cast(V)val; 274 275 p.entry = newNode; 276 } 277 } 278 279 private size_t calcHash(scope const K pkey) pure { 280 // highest bit is set to distinguish empty/deleted from filled buckets 281 const hash = TKey.getHash(pkey); 282 return mix(hash) | HASH_FILLED_MARK; 283 } 284 285 void resize(size_t sz) { 286 auto obuckets = buckets; 287 allocHtable(sz); 288 289 foreach (ref b; obuckets[firstUsed .. $]){ 290 if (b.filled) 291 *findSlotInsert(b.hash) = b; 292 if (b.empty || b.deleted){ 293 allocator.dispose(b.entry); 294 295 b.entry = null; 296 } 297 298 } 299 300 firstUsed = 0; 301 used -= deleted; 302 deleted = 0; 303 304 allocator.dispose(obuckets.ptr); 305 } 306 307 void rehash() { 308 if (length) 309 resize(nextpow2(INIT_DEN * length / INIT_NUM)); 310 } 311 312 void grow() { 313 resize(length * SHRINK_DEN < GROW_FAC * dim * SHRINK_NUM ? dim : GROW_FAC * dim); 314 } 315 316 void shrink() { 317 if (dim > INIT_NUM_BUCKETS) 318 resize(dim / GROW_FAC); 319 } 320 321 bool remove(scope const K key) { 322 if (!length) 323 return false; 324 325 const hash = calcHash(key); 326 if (auto p = findSlotLookup(hash, key)){ 327 // clear entry 328 p.hash = HASH_DELETED; 329 // just mark it to be disposed 330 331 ++deleted; 332 if (length * SHRINK_DEN < dim * SHRINK_NUM) 333 shrink(); 334 335 return true; 336 } 337 return false; 338 } 339 340 V get(scope const K key) { 341 if(auto ret = key in this) 342 return *ret; 343 return V.init; 344 } 345 346 alias opIndex = get; 347 348 void opIndexAssign(scope const V value, scope const K key) { 349 set(key, value); 350 } 351 352 static if(isSomeString!K) @property { 353 auto opDispatch(K key)() { 354 return opIndex(key); 355 } 356 357 auto opDispatch(K key)(scope const V value) { 358 return opIndexAssign(value, key); 359 } 360 } 361 362 V* opBinaryRight(string op : "in")(scope const K key) { 363 if (!length) 364 return null; 365 366 const keyHash = calcHash(key); 367 if (auto buck = findSlotLookup(keyHash, key)) 368 return &buck.entry.val; 369 return null; 370 } 371 372 /// returning slice must be deallocated like Allocator.dispose(keys); 373 // use byKeyValue to avoid extra allocations 374 K[] keys() { 375 K[] ks = allocator.makeArray!K(length); 376 size_t j; 377 foreach (ref b; buckets[firstUsed .. $]){ 378 if (b.filled){ 379 ks[j++] = b.entry.key; 380 } 381 } 382 383 return ks; 384 } 385 386 /// returning slice must be deallocated like Allocator.dispose(values); 387 // use byKeyValue to avoid extra allocations 388 V[] values() { 389 V[] vals = allocator.makeArray!V(length); 390 size_t j; 391 foreach (ref b; buckets[firstUsed .. $]){ 392 if (b.filled){ 393 vals[j++] = b.entry.val; 394 } 395 } 396 397 return vals; 398 } 399 400 void clear() { // WIP 401 /+ not sure if this works with this port 402 import core.stdc.string : memset; 403 // clear all data, but don't change bucket array length 404 memset(&buckets[firstUsed], 0, (buckets.length - firstUsed) * Bucket.sizeof); 405 +/ 406 // just loop over entire slice 407 foreach(ref b; buckets) 408 if(b.entry){ 409 allocator.dispose(b.entry); 410 } 411 deleted = used = 0; 412 firstUsed = cast(uint) dim; 413 buckets[] = Bucket.init; 414 } 415 416 void free() { 417 foreach(ref b; buckets) 418 if(b.entry){ 419 allocator.dispose(b.entry); 420 } 421 422 allocator.dispose(buckets); 423 deleted = used = 0; 424 buckets = null; 425 } 426 427 auto copy() { 428 auto newBuckets = allocator.makeArray!Bucket(buckets.length); 429 memcpy(newBuckets.ptr, buckets.ptr, buckets.length * Bucket.sizeof); 430 typeof(this) newAA = { 431 newBuckets, firstUsed, used, deleted 432 }; 433 return newAA; 434 } 435 436 // opApply will be deprecated. Use byKeyValue instead 437 int opApply(int delegate(AAPair!(K, V)) @nogc nothrow dg) { 438 if (!buckets.length) 439 return 0; 440 int result = 0; 441 foreach (ref b; buckets[firstUsed .. $]){ 442 if (!b.filled) 443 continue; 444 result = dg(AAPair!(K, V)(&b.entry.key, &b.entry.val)); 445 if (result) { 446 break; 447 } 448 } 449 return 0; 450 } 451 452 struct BCAARange(alias rangeType) { 453 typeof(buckets) bucks; 454 size_t len; 455 size_t current; 456 457 nothrow @nogc: 458 459 bool empty() const pure @safe { 460 return len == 0; 461 } 462 463 // front must be called first before popFront 464 auto front(){ 465 while(bucks[current].hash <= 0) 466 ++current; 467 468 auto entry = bucks[current].entry; 469 mixin rangeType!entry; 470 return rangeType; 471 } 472 473 void popFront(){ 474 foreach (ref b; bucks[current .. $]){ 475 if (!b.empty){ 476 --len; 477 ++current; 478 break; 479 } 480 } 481 } 482 } 483 484 // The following functions return an InputRange 485 auto byKeyValue() { 486 auto rangeType(alias T) = T; 487 return BCAARange!rangeType(buckets, length, firstUsed); 488 } 489 490 auto byKey() { 491 auto rangeType(alias T) = T.key; 492 return BCAARange!rangeType(buckets, length, firstUsed); 493 } 494 495 auto byValue() { 496 auto rangeType(alias T) = T.val; 497 return BCAARange!rangeType(buckets, length, firstUsed); 498 } 499 } 500 501 struct AAPair(K, V) { 502 K* keyp; 503 V* valp; 504 } 505 506 private size_t nextpow2(size_t n) pure nothrow @nogc { 507 import core.bitop : bsr; 508 509 if (!n) 510 return 1; 511 512 const isPowerOf2 = !((n - 1) & n); 513 return 1 << (bsr(n) + !isPowerOf2); 514 } 515 516 private size_t mix(size_t h) @safe pure nothrow @nogc { 517 enum m = 0x5bd1e995; 518 h ^= h >> 13; 519 h *= m; 520 h ^= h >> 15; 521 return h; 522 } 523 524 unittest 525 { 526 Bcaa!(string, string) aa; 527 scope(exit) aa.free; 528 aa["foo".idup] = "bar"; 529 assert(aa.foo == "bar"); 530 } 531 532 nothrow @nogc: 533 unittest { 534 import core.stdc.stdio; 535 import core.stdc.time; 536 537 clock_t begin = clock(); 538 { 539 Bcaa!(int, int) aa0; 540 scope(exit) aa0.free; 541 542 foreach (i; 0..1000_000){ 543 aa0[i] = i; 544 } 545 546 foreach (i; 2000..1000_000){ 547 aa0.remove(i); 548 } 549 550 printf("%d\n", aa0[1000]); 551 } 552 clock_t end = clock(); printf("Elapsed time: %f \n", double(end - begin) / CLOCKS_PER_SEC); 553 554 { 555 Bcaa!(string, string) aa1; 556 scope(exit) aa1.free; 557 558 aa1["Stevie"] = "Ray Vaughan"; 559 aa1["Asım Can"] = "Gündüz"; 560 aa1["Dan"] = "Patlansky"; 561 aa1["İlter"] = "Kurcala"; 562 aa1.Ferhat = "Kurtulmuş"; 563 564 foreach(pair; aa1){ 565 printf("%s -> %s", (*pair.keyp).ptr, (*pair.valp).ptr); 566 } 567 568 if (auto valptr = "Dan" in aa1) 569 printf("%s exists!!!!\n", (*valptr).ptr ); 570 else 571 printf("does not exist!!!!\n"); 572 573 assert(aa1.remove("Ferhat") == true); 574 assert(aa1.Ferhat == null); 575 assert(aa1.remove("Foe") == false); 576 assert(aa1["İlter"] =="Kurcala"); 577 578 aa1.rehash(); 579 580 printf("%s\n",aa1["Stevie"].ptr); 581 printf("%s\n",aa1["Asım Can"].ptr); 582 printf("%s\n",aa1.Dan.ptr); 583 printf("%s\n",aa1["Ferhat"].ptr); 584 585 auto keys = aa1.keys; 586 scope(exit) aa1.allocator.dispose(keys); 587 foreach(key; keys) 588 printf("%s -> %s\n", key.ptr, aa1[key].ptr); 589 590 // byKey, byValue, and byKeyValue do not allocate 591 // They use the range magic of D 592 foreach (pp; aa1.byKeyValue()){ 593 printf("%s: %s\n", pp.key.ptr, pp.value.ptr); 594 595 } 596 597 struct Guitar { 598 string brand; 599 } 600 601 Bcaa!(int, Guitar) guitars; 602 scope(exit) guitars.free; 603 604 guitars[0] = Guitar("Fender"); 605 guitars[3] = Guitar("Gibson"); 606 guitars[356] = Guitar("Stagg"); 607 608 assert(guitars[3].brand == "Gibson"); 609 610 printf("%s\n", guitars[356].brand.ptr); 611 612 if(auto valPtr = 3 in guitars) 613 printf("%s\n", (*valPtr).brand.ptr); 614 } 615 } 616 617 unittest { 618 Bcaa!(string, int) aa; 619 scope(exit) aa.free; 620 aa.foo = 1; 621 aa.bar = 0; 622 assert("foo" in aa); 623 assert(aa.foo == 1); 624 625 aa.clear; 626 assert("foo" !in aa); 627 628 aa.bar = 2; 629 assert("bar" in aa); 630 assert(aa.bar == 2); 631 } 632 633 // Test "in" works for AA without allocated storage. 634 unittest { 635 Bcaa!(int, int) emptyMap; 636 assert(0 !in emptyMap); 637 } 638 639 // Try to force a memory leak - issue #5 640 unittest { 641 struct S { 642 int x, y; 643 string txt; 644 } 645 646 Bcaa!(int, S) aas; 647 scope(exit) aas.free; 648 649 for(int i = 1024; i < 2048; i++) { 650 aas[i] = S(i, i*2, "caca\0"); 651 } 652 aas[100] = S(10, 20, "caca\0"); 653 654 import core.stdc.stdio; 655 printf(".x=%d .y%d %s\n", aas[100].x, aas[100].y, aas[100].txt.ptr); 656 657 for(int i = 1024; i < 2048; i++) { 658 aas.remove(i); 659 } 660 }