1 /** Simple associative array implementation for D (-betterC)
2 
3 The author of the original implementation: Martin Nowak
4 
5 Copyright:
6  Copyright (c) 2020, Ferhat Kurtulmuş.
7 
8  License:
9    $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
10 
11 Simplified betterC port of druntime/blob/master/src/rt/aaA.d
12 */
13 
14 module bcaa;
15 
16 version(LDC){
17     version(D_BetterC){
18         pragma(LDC_no_moduleinfo);
19     }
20 }
21 
22 //import std.experimental.allocator.common : stateSize;
23 
24 import core.stdc.string;
25 private enum {
26     // grow threshold
27     GROW_NUM = 4,
28     GROW_DEN = 5,
29     // shrink threshold
30     SHRINK_NUM = 1,
31     SHRINK_DEN = 8,
32     // grow factor
33     GROW_FAC = 4
34 }
35 // growing the AA doubles it's size, so the shrink threshold must be
36 // smaller than half the grow threshold to have a hysteresis
37 static assert(GROW_FAC * SHRINK_NUM * GROW_DEN < GROW_NUM * SHRINK_DEN);
38 
39 private enum {
40     // initial load factor (for literals), mean of both thresholds
41     INIT_NUM = (GROW_DEN * SHRINK_NUM + GROW_NUM * SHRINK_DEN) / 2,
42     INIT_DEN = SHRINK_DEN * GROW_DEN,
43 
44     INIT_NUM_BUCKETS = 8,
45     // magic hash constants to distinguish empty, deleted, and filled buckets
46     HASH_EMPTY = 0,
47     HASH_DELETED = 0x1,
48     HASH_FILLED_MARK = size_t(1) << 8 * size_t.sizeof - 1
49 }
50 
51 private {
52     alias hash_t = size_t;
53 
54     enum isSomeString(T) = is(immutable T == immutable C[], C) && (is(C == char) || is(C == wchar) || is(C == dchar));
55 
56     template KeyType(K){
57         alias Key = K;
58 
59         @nogc nothrow pure:
60         hash_t getHash(scope const Key key) @safe {
61             return key.hashOf;
62         }
63 
64         bool equals(scope const Key k1, scope const Key k2) {
65             static if(is(K == const(char)*)){
66                 return strlen(k1) == strlen(k2) &&
67                     strcmp(k1, k2) == 0;
68             } else static if(isSomeString!K){
69                 const len = k1.length;
70                 return len == k2.length && strncmp(k1.ptr, k2.ptr, len) == 0;
71             } else {
72                 return k1 == k2;
73             }
74         }
75     }
76 }
77 
78 /// mallocator code BEGINS
79 
80 // based on std.experimental.allocator.mallocator and
81 // https://github.com/submada/basic_string/blob/main/src/basic_string/package.d:
82 
83 struct Mallocator {
84     //import std.experimental.allocator.common : platformAlignment;
85     import core.stdc.stdlib: malloc, realloc, free;
86 
87     //enum uint alignment = platformAlignment;
88 
89 static:
90 
91     void[] allocate(size_t bytes) @trusted @nogc nothrow {
92         if (!bytes) return null;
93         auto p = malloc(bytes);
94         return p ? p[0 .. bytes] : null;
95     }
96 
97     void deallocate(void[] b) @system @nogc nothrow {
98         free(b.ptr);
99     }
100 
101     alias
102         deallocate = free,
103         dispose = deallocate;
104 
105     bool reallocate(ref void[] b, size_t s) @system @nogc nothrow {
106         if (!s){
107             // fuzzy area in the C standard, see http://goo.gl/ZpWeSE
108             // so just deallocate and nullify the pointer
109             deallocate(b);
110             b = null;
111             return true;
112         }
113 
114         auto p = cast(ubyte*) realloc(b.ptr, s);
115         if (!p) return false;
116         b = p[0 .. s];
117         return true;
118     }
119 
120     Mallocator instance;
121 }
122 
123 T[] makeArray(T, Allocator)(auto ref Allocator alloc, size_t length){
124     return cast(T[])alloc.allocate(length * T.sizeof);
125 }
126 
127 T* make(T, Allocator)(auto ref Allocator alloc){
128     return cast(T*)alloc.allocate(T.sizeof).ptr;
129 }
130 
131 /// mallocator code ENDS
132 
133 struct Bcaa(K, V, Allocator = Mallocator) {
134 
135     struct Node{
136         K key;
137         V val;
138 
139         alias value = val;
140     }
141 
142     struct Bucket {
143     private pure nothrow @nogc @safe:
144         size_t hash;
145         Node* entry;
146         @property bool empty() const {
147             return hash == HASH_EMPTY;
148         }
149 
150         @property bool deleted() const {
151             return hash == HASH_DELETED;
152         }
153 
154         @property bool filled() const {
155             return cast(ptrdiff_t) hash < 0;
156         }
157     }
158 
159 private:
160     Bucket[] buckets;
161 
162     void allocHtable(size_t sz) @nogc nothrow {
163         auto _htable = allocator.makeArray!Bucket(sz);
164         _htable[] = Bucket.init;
165         buckets = _htable;
166     }
167 
168     void initTableIfNeeded() @nogc nothrow {
169         if (buckets is null) {
170             allocHtable(INIT_NUM_BUCKETS);
171             firstUsed = INIT_NUM_BUCKETS;
172         }
173     }
174 
175 public:
176     uint firstUsed;
177     uint used;
178     uint deleted;
179 
180     alias TKey = KeyType!K;
181 
182     alias allocator = Allocator.instance;
183 
184     // for GC usages
185     // opApply will be deprecated. Use byKeyValue instead
186     int opApply(int delegate(AAPair!(K, V)) dg) {
187         if (buckets is null || buckets.length == 0)
188             return 0;
189         int result = 0;
190         foreach (ref b; buckets[firstUsed .. $]){
191             if (!b.filled)
192                 continue;
193             result = dg(AAPair!(K, V)(&b.entry.key, &b.entry.val));
194             if (result) {
195                 break;
196             }
197         }
198         return 0;
199     }
200 
201 @nogc nothrow:
202     @property pure @safe {
203         size_t length() const
204         in(used >= deleted) {
205             return used - deleted;
206         }
207 
208         size_t dim() const {
209             return buckets.length;
210         }
211 
212         size_t mask() const {
213             return dim - 1;
214         }
215     }
216 
217     inout(Bucket)* findSlotInsert(size_t hash) inout pure {
218         for (size_t i = hash & mask, j = 1;; ++j){
219             if (!buckets[i].filled)
220                 return &buckets[i];
221             i = (i + j) & mask;
222         }
223     }
224 
225     inout(Bucket)* findSlotLookup(size_t hash, scope const K key) inout {
226         for (size_t i = hash & mask, j = 1;; ++j){
227 
228             if (buckets[i].hash == hash && TKey.equals(key, buckets[i].entry.key))
229                 return &buckets[i];
230 
231             if (buckets[i].empty)
232                 return null;
233             i = (i + j) & mask;
234         }
235     }
236 
237     void set(scope const K key, scope const V val) {
238         initTableIfNeeded();
239 
240         const keyHash = calcHash(key);
241 
242         if (auto p = findSlotLookup(keyHash, key)){
243             p.entry.val = cast(V)val;
244             return;
245         }
246 
247         auto p = findSlotInsert(keyHash);
248 
249         if (p.deleted)
250             --deleted;
251 
252         // check load factor and possibly grow
253         else if (++used * GROW_DEN > dim * GROW_NUM){
254             grow();
255             p = findSlotInsert(keyHash);
256             //assert(p.empty);
257         }
258 
259         // update search cache and allocate entry
260         uint m = cast(uint)(p - buckets.ptr);
261         if (m < firstUsed) {
262             firstUsed = m;
263         }
264 
265         p.hash = keyHash;
266 
267         if (p.deleted){
268             p.entry.key = key;
269             p.entry.val = cast(V)val;
270         } else {
271             Node* newNode = allocator.make!Node();
272             newNode.key = key;
273             newNode.val = cast(V)val;
274 
275             p.entry = newNode;
276         }
277     }
278 
279     private size_t calcHash(scope const K pkey) pure {
280         // highest bit is set to distinguish empty/deleted from filled buckets
281         const hash = TKey.getHash(pkey);
282         return mix(hash) | HASH_FILLED_MARK;
283     }
284 
285     void resize(size_t sz) {
286         auto obuckets = buckets;
287         allocHtable(sz);
288 
289         foreach (ref b; obuckets[firstUsed .. $]){
290             if (b.filled)
291                 *findSlotInsert(b.hash) = b;
292             if (b.empty || b.deleted){
293                 allocator.dispose(b.entry);
294 
295                 b.entry = null;
296             }
297 
298         }
299 
300         firstUsed = 0;
301         used -= deleted;
302         deleted = 0;
303 
304         allocator.dispose(obuckets.ptr);
305     }
306 
307     void rehash() {
308         if (length)
309             resize(nextpow2(INIT_DEN * length / INIT_NUM));
310     }
311 
312     void grow() {
313         resize(length * SHRINK_DEN < GROW_FAC * dim * SHRINK_NUM ? dim : GROW_FAC * dim);
314     }
315 
316     void shrink() {
317         if (dim > INIT_NUM_BUCKETS)
318             resize(dim / GROW_FAC);
319     }
320 
321     bool remove(scope const K key) {
322         if (!length)
323             return false;
324 
325         const hash = calcHash(key);
326         if (auto p = findSlotLookup(hash, key)){
327             // clear entry
328             p.hash = HASH_DELETED;
329             // just mark it to be disposed
330 
331             ++deleted;
332             if (length * SHRINK_DEN < dim * SHRINK_NUM)
333                 shrink();
334 
335             return true;
336         }
337         return false;
338     }
339 
340     V get(scope const K key) {
341         if(auto ret = key in this)
342             return *ret;
343         return V.init;
344     }
345 
346     alias opIndex = get;
347 
348     void opIndexAssign(scope const V value, scope const K key) {
349         set(key, value);
350     }
351 
352     static if(isSomeString!K) @property {
353         auto opDispatch(K key)() {
354             return opIndex(key);
355         }
356 
357         auto opDispatch(K key)(scope const V value) {
358             return opIndexAssign(value, key);
359         }
360     }
361 
362     V* opBinaryRight(string op : "in")(scope const K key) {
363         if (!length)
364             return null;
365 
366         const keyHash = calcHash(key);
367         if (auto buck = findSlotLookup(keyHash, key))
368             return &buck.entry.val;
369         return null;
370     }
371 
372     /// returning slice must be deallocated like Allocator.dispose(keys);
373     // use byKeyValue to avoid extra allocations
374     K[] keys() {
375         K[] ks = allocator.makeArray!K(length);
376         size_t j;
377         foreach (ref b; buckets[firstUsed .. $]){
378             if (b.filled){
379                 ks[j++] = b.entry.key;
380             }
381         }
382 
383         return ks;
384     }
385 
386     /// returning slice must be deallocated like Allocator.dispose(values);
387     // use byKeyValue to avoid extra allocations
388     V[] values() {
389         V[] vals = allocator.makeArray!V(length);
390         size_t j;
391         foreach (ref b; buckets[firstUsed .. $]){
392             if (b.filled){
393                 vals[j++] = b.entry.val;
394             }
395         }
396 
397         return vals;
398     }
399 
400     void clear() { // WIP
401         /+ not sure if this works with this port
402         import core.stdc.string : memset;
403         // clear all data, but don't change bucket array length
404         memset(&buckets[firstUsed], 0, (buckets.length - firstUsed) * Bucket.sizeof);
405         +/
406         // just loop over entire slice
407         foreach(ref b; buckets)
408             if(b.entry){
409                 allocator.dispose(b.entry);
410             }
411         deleted = used = 0;
412         firstUsed = cast(uint) dim;
413         buckets[] = Bucket.init;
414     }
415 
416     void free() {
417         foreach(ref b; buckets)
418             if(b.entry){
419                 allocator.dispose(b.entry);
420             }
421 
422         allocator.dispose(buckets);
423         deleted = used = 0;
424         buckets = null;
425     }
426 
427     auto copy() {
428         auto newBuckets = allocator.makeArray!Bucket(buckets.length);
429         memcpy(newBuckets.ptr, buckets.ptr, buckets.length * Bucket.sizeof);
430         typeof(this) newAA = {
431             newBuckets, firstUsed, used, deleted
432         };
433         return newAA;
434     }
435 
436     // opApply will be deprecated. Use byKeyValue instead
437     int opApply(int delegate(AAPair!(K, V)) @nogc nothrow dg) {
438         if (!buckets.length)
439             return 0;
440         int result = 0;
441         foreach (ref b; buckets[firstUsed .. $]){
442             if (!b.filled)
443                 continue;
444             result = dg(AAPair!(K, V)(&b.entry.key, &b.entry.val));
445             if (result) {
446                 break;
447             }
448         }
449         return 0;
450     }
451 
452     struct BCAARange(alias rangeType) {
453         typeof(buckets) bucks;
454         size_t len;
455         size_t current;
456 
457         nothrow @nogc:
458 
459         bool empty() const pure @safe {
460             return len == 0;
461         }
462 
463         // front must be called first before popFront
464         auto front(){
465             while(bucks[current].hash <= 0)
466                 ++current;
467 
468             auto entry = bucks[current].entry;
469             mixin rangeType!entry;
470             return rangeType;
471         }
472 
473         void popFront(){
474             foreach (ref b; bucks[current .. $]){
475                 if (!b.empty){
476                     --len;
477                     ++current;
478                     break;
479                 }
480             }
481         }
482     }
483 
484     // The following functions return an InputRange
485     auto byKeyValue() {
486         auto rangeType(alias T) = T;
487         return BCAARange!rangeType(buckets, length, firstUsed);
488     }
489 
490     auto byKey() {
491         auto rangeType(alias T) = T.key;
492         return BCAARange!rangeType(buckets, length, firstUsed);
493     }
494 
495     auto byValue() {
496         auto rangeType(alias T) = T.val;
497         return BCAARange!rangeType(buckets, length, firstUsed);
498     }
499 }
500 
501 struct AAPair(K, V) {
502     K* keyp;
503     V* valp;
504 }
505 
506 private size_t nextpow2(size_t n) pure nothrow @nogc {
507     import core.bitop : bsr;
508 
509     if (!n)
510         return 1;
511 
512     const isPowerOf2 = !((n - 1) & n);
513     return 1 << (bsr(n) + !isPowerOf2);
514 }
515 
516 private size_t mix(size_t h) @safe pure nothrow @nogc {
517     enum m = 0x5bd1e995;
518     h ^= h >> 13;
519     h *= m;
520     h ^= h >> 15;
521     return h;
522 }
523 
524 unittest
525 {
526     Bcaa!(string, string) aa;
527     scope(exit) aa.free;
528     aa["foo".idup] = "bar";
529     assert(aa.foo == "bar");
530 }
531 
532 nothrow @nogc:
533 unittest {
534     import core.stdc.stdio;
535     import core.stdc.time;
536 
537     clock_t begin = clock();
538     {
539         Bcaa!(int, int) aa0;
540         scope(exit) aa0.free;
541 
542         foreach (i; 0..1000_000){
543             aa0[i] = i;
544         }
545 
546         foreach (i; 2000..1000_000){
547             aa0.remove(i);
548         }
549 
550         printf("%d\n", aa0[1000]);
551     }
552     clock_t end = clock(); printf("Elapsed time: %f \n", double(end - begin) / CLOCKS_PER_SEC);
553 
554     {
555         Bcaa!(string, string) aa1;
556         scope(exit) aa1.free;
557 
558         aa1["Stevie"] = "Ray Vaughan";
559         aa1["Asım Can"] = "Gündüz";
560         aa1["Dan"] = "Patlansky";
561         aa1["İlter"] = "Kurcala";
562         aa1.Ferhat = "Kurtulmuş";
563 
564         foreach(pair; aa1){
565             printf("%s -> %s", (*pair.keyp).ptr, (*pair.valp).ptr);
566         }
567 
568         if (auto valptr = "Dan" in aa1)
569             printf("%s exists!!!!\n", (*valptr).ptr );
570         else
571             printf("does not exist!!!!\n");
572 
573         assert(aa1.remove("Ferhat") == true);
574         assert(aa1.Ferhat == null);
575         assert(aa1.remove("Foe") == false);
576         assert(aa1["İlter"] =="Kurcala");
577 
578         aa1.rehash();
579 
580         printf("%s\n",aa1["Stevie"].ptr);
581         printf("%s\n",aa1["Asım Can"].ptr);
582         printf("%s\n",aa1.Dan.ptr);
583         printf("%s\n",aa1["Ferhat"].ptr);
584 
585         auto keys = aa1.keys;
586         scope(exit) aa1.allocator.dispose(keys);
587         foreach(key; keys)
588             printf("%s -> %s\n", key.ptr, aa1[key].ptr);
589 
590         // byKey, byValue, and byKeyValue do not allocate
591         // They use the range magic of D
592         foreach (pp; aa1.byKeyValue()){
593             printf("%s: %s\n", pp.key.ptr, pp.value.ptr);
594 
595         }
596 
597         struct Guitar {
598             string brand;
599         }
600 
601         Bcaa!(int, Guitar) guitars;
602         scope(exit) guitars.free;
603 
604         guitars[0] = Guitar("Fender");
605         guitars[3] = Guitar("Gibson");
606         guitars[356] = Guitar("Stagg");
607 
608         assert(guitars[3].brand == "Gibson");
609 
610         printf("%s\n", guitars[356].brand.ptr);
611 
612         if(auto valPtr = 3 in guitars)
613             printf("%s\n", (*valPtr).brand.ptr);
614     }
615 }
616 
617 unittest {
618     Bcaa!(string, int) aa;
619     scope(exit) aa.free;
620     aa.foo = 1;
621     aa.bar = 0;
622     assert("foo" in aa);
623     assert(aa.foo == 1);
624 
625     aa.clear;
626     assert("foo" !in aa);
627 
628     aa.bar = 2;
629     assert("bar" in aa);
630     assert(aa.bar == 2);
631 }
632 
633 // Test "in" works for AA without allocated storage.
634 unittest {
635     Bcaa!(int, int) emptyMap;
636     assert(0 !in emptyMap);
637 }
638 
639 // Try to force a memory leak - issue #5
640 unittest {
641     struct S {
642         int x, y;
643         string txt;
644     }
645 
646     Bcaa!(int, S) aas;
647     scope(exit) aas.free;
648 
649     for(int i = 1024; i < 2048; i++) {
650         aas[i] = S(i, i*2, "caca\0");
651     }
652     aas[100] = S(10, 20, "caca\0");
653 
654     import core.stdc.stdio;
655     printf(".x=%d .y%d %s\n", aas[100].x, aas[100].y, aas[100].txt.ptr);
656 
657     for(int i = 1024; i < 2048; i++) {
658         aas.remove(i);
659     }
660 }