1 /** Simple associative array implementation for D (-betterC)
2 
3 The author of the original implementation: Martin Nowak
4 
5 Copyright:
6  Copyright (c) 2020, Ferhat Kurtulmuş.
7 
8  License:
9    $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
10 
11 Simplified betterC port of druntime/blob/master/src/rt/aaA.d
12 */
13 
14 module bcaa;
15 
16 version(LDC){
17     version(D_BetterC){
18         pragma(LDC_no_moduleinfo);
19     }
20 }
21 
22 //import std.experimental.allocator.common : stateSize;
23 
24 import core.stdc.string;
25 
26 // grow threshold
27 private enum GROW_NUM = 4;
28 private enum GROW_DEN = 5;
29 // shrink threshold
30 private enum SHRINK_NUM = 1;
31 private enum SHRINK_DEN = 8;
32 // grow factor
33 private enum GROW_FAC = 4;
34 // growing the AA doubles it's size, so the shrink threshold must be
35 // smaller than half the grow threshold to have a hysteresis
36 static assert(GROW_FAC * SHRINK_NUM * GROW_DEN < GROW_NUM * SHRINK_DEN);
37 // initial load factor (for literals), mean of both thresholds
38 private enum INIT_NUM = (GROW_DEN * SHRINK_NUM + GROW_NUM * SHRINK_DEN) / 2;
39 private enum INIT_DEN = SHRINK_DEN * GROW_DEN;
40 
41 private enum INIT_NUM_BUCKETS = 8;
42 // magic hash constants to distinguish empty, deleted, and filled buckets
43 private enum HASH_EMPTY = 0;
44 private enum HASH_DELETED = 0x1;
45 private enum HASH_FILLED_MARK = size_t(1) << 8 * size_t.sizeof - 1;
46 
47 private {
48     alias hash_t = size_t;
49 
50     struct KeyType(K){
51         alias Key = K;
52 
53         static hash_t getHash(scope const Key key) @nogc @safe nothrow pure {
54             return key.hashOf;
55         }
56 
57         static bool equals(scope const Key k1, scope const Key k2) @nogc nothrow pure {
58             static if(is(K == const(char)*)){
59                 return strlen(k1) == strlen(k2) &&
60                     strcmp(k1, k2) == 0;
61             } else static if(is(K == string)){
62                 return strlen(k1.ptr) == strlen(k2.ptr) &&
63                     strcmp(k1.ptr, k2.ptr) == 0;
64             } else {
65                 return k1 == k2;
66             }
67         }
68     }
69 }
70 
71 /// mallocator code BEGINS
72 
73 // based on std.experimental.allocator.mallocator and 
74 // https://github.com/submada/basic_string/blob/main/src/basic_string/package.d:
75 
76 struct Mallocator{
77 	//import std.experimental.allocator.common : platformAlignment;
78     import core.stdc.stdlib: malloc, realloc, free;
79 
80 	//enum uint alignment = platformAlignment;
81 
82 	static void[] allocate(size_t bytes)@trusted @nogc nothrow {
83 		if (!bytes) return null;
84 		auto p = malloc(bytes);
85 		return p ? p[0 .. bytes] : null;
86 	}
87 
88 	static bool deallocate(void[] b)@system @nogc nothrow {
89 		free(b.ptr);
90 		return true;
91 	}
92 
93 	static bool reallocate(ref void[] b, size_t s)@system @nogc nothrow {
94 		if (!s){
95 			// fuzzy area in the C standard, see http://goo.gl/ZpWeSE
96 			// so just deallocate and nullify the pointer
97 			deallocate(b);
98 			b = null;
99 			return true;
100 		}
101 
102 		auto p = cast(ubyte*) realloc(b.ptr, s);
103 		if (!p) return false;
104 		b = p[0 .. s];
105 		return true;
106 	}
107 
108 	static Mallocator instance;
109 }
110 
111 T[] makeArray(T, Allocator)(auto ref Allocator alloc, size_t length){
112     return cast(T[])alloc.allocate(length * T.sizeof);
113 }
114 
115 T* make(T, Allocator)(auto ref Allocator alloc){
116     return cast(T*)(alloc.allocate(T.sizeof).ptr);
117 }
118 
119 void dispose(A, T)(auto ref A alloc, auto ref T[] array){
120     alloc.deallocate(cast(void[])array);
121 }
122 
123 void dispose(A, T)(auto ref A alloc, auto ref T* p){
124     alloc.deallocate(p[0..1]);
125 }
126 
127 /// mallocator code ENDS
128 
129 struct Bcaa(K, V, Allocator = Mallocator) {
130 
131     struct Node{
132         K key;
133         V val;
134     }
135 
136     struct Bucket {
137     private pure nothrow @nogc:
138         size_t hash;
139         Node* entry;
140         @property bool empty() const @nogc nothrow {
141             return hash == HASH_EMPTY;
142         }
143 
144         @property bool deleted() const @nogc nothrow {
145             return hash == HASH_DELETED;
146         }
147 
148         @property bool filled() const @safe @nogc nothrow {
149             return cast(ptrdiff_t) hash < 0;
150         }
151     }
152 
153     private Bucket[] buckets;
154 
155     uint firstUsed;
156     uint used;
157     uint deleted;
158 
159     alias TKey = KeyType!K;
160     TKey tkey;
161     
162     /+ causes linker errors
163 
164     //enum bool hasStatelessAllocator = (stateSize!Allocator == 0);
165     static if (hasStatelessAllocator)
166     {
167         alias allocator = Allocator.instance;
168 
169     } else {
170         private Allocator allocator;
171 
172         /// No default construction if an allocator must be provided.
173         this() @disable;
174 
175         /**
176         * Use the given `allocator` for allocations.
177         */
178         this(Allocator allocator)
179         in
180         {
181           assert(allocator !is null, "Allocator must not be null");
182         }
183         do
184         {
185           this.allocator = allocator;
186         }
187     }
188     +/
189     alias allocator = Allocator.instance;
190 
191     @property size_t length() const pure nothrow @nogc {
192         //assert(used >= deleted);
193         return used - deleted;
194     }
195 
196     private Bucket[] allocHtable(scope const size_t sz) @nogc nothrow {
197         Bucket[] _htable = allocator.makeArray!(Bucket)(sz);
198         _htable[] = Bucket.init;
199         return _htable;
200     }
201 
202     private void initTableIfNeeded() @nogc nothrow {
203         if(buckets is null){
204             buckets = allocHtable(INIT_NUM_BUCKETS);
205             firstUsed = cast(uint)INIT_NUM_BUCKETS;
206         }
207 
208     }
209     @property size_t dim() const pure nothrow @nogc {
210         return buckets.length;
211     }
212 
213     @property size_t mask() const pure nothrow @nogc {
214         return dim - 1;
215     }
216 
217     inout(Bucket)* findSlotInsert(const size_t hash) inout pure nothrow @nogc {
218         for (size_t i = hash & mask, j = 1;; ++j){
219             if (!buckets[i].filled)
220                 return &buckets[i];
221             i = (i + j) & mask;
222         }
223     }
224 
225     inout(Bucket)* findSlotLookup(size_t hash, scope const K key) inout nothrow @nogc {
226         for (size_t i = hash & mask, j = 1;; ++j){
227 
228             if (buckets[i].hash == hash && tkey.equals(key, buckets[i].entry.key)){
229                 return &buckets[i];
230             }
231 
232             else if (buckets[i].empty)
233                 return null;
234             i = (i + j) & mask;
235         }
236     }
237 
238     void set(scope const K key, scope const V val) @nogc nothrow {
239         initTableIfNeeded();
240 
241         immutable keyHash = calcHash(key);
242 
243         if (auto p = findSlotLookup(keyHash, key)){
244             p.entry.val = cast(V)val;
245             return;
246         }
247 
248         auto p = findSlotInsert(keyHash);
249 
250         if (p.deleted)
251             --deleted;
252 
253         // check load factor and possibly grow
254         else if (++used * GROW_DEN > dim * GROW_NUM){
255             grow();
256             p = findSlotInsert(keyHash);
257             //assert(p.empty);
258         }
259 
260         // update search cache and allocate entry
261         firstUsed = min(firstUsed, cast(uint)(p - buckets.ptr));
262 
263         p.hash = keyHash;
264 
265         if(!p.deleted){
266             Node* newNode = allocator.make!Node();
267             newNode.key = key;
268             newNode.val = cast(V)val;
269 
270             p.entry = newNode;
271         }else{
272             p.entry.key = key;
273             p.entry.val = cast(V)val;
274         }
275     }
276 
277     private size_t calcHash(scope const K pkey) pure @nogc nothrow {
278         // highest bit is set to distinguish empty/deleted from filled buckets
279         immutable hash = tkey.getHash(pkey);
280         return mix(hash) | HASH_FILLED_MARK;
281     }
282 
283     void resize(const size_t sz) @nogc nothrow {
284         auto obuckets = buckets;
285         buckets = allocHtable(sz);
286 
287         foreach (ref b; obuckets[firstUsed .. $]){
288             if (b.filled)
289                 *findSlotInsert(b.hash) = b;
290             if (b.empty || b.deleted){
291                 allocator.dispose(b.entry);
292                 //core.stdc.stdlib.free(b.entry);
293                 b.entry = null;
294             }
295 
296         }
297 
298         firstUsed = 0;
299         used -= deleted;
300         deleted = 0;
301 
302         //core.stdc.stdlib.free(obuckets.ptr);
303         allocator.dispose(obuckets.ptr);
304     }
305 
306     void rehash() @nogc nothrow {
307         if (length)
308             resize(nextpow2(INIT_DEN * length / INIT_NUM));
309     }
310 
311     void grow() @nogc nothrow {
312         if (length * SHRINK_DEN < GROW_FAC * dim * SHRINK_NUM)
313             resize(dim);
314         else
315             resize(GROW_FAC * dim);
316     }
317 
318     void shrink() @nogc nothrow {
319         if (dim > INIT_NUM_BUCKETS)
320             resize(dim / GROW_FAC);
321     }
322 
323     bool remove(scope const K key) @nogc nothrow {
324         if (!length)
325             return false;
326 
327         immutable hash = calcHash(key);
328         if (auto p = findSlotLookup(hash, key)){
329             // clear entry
330             p.hash = HASH_DELETED;
331             //core.stdc.stdlib.free(p.entry);
332             //p.entry = null;
333 
334             ++deleted;
335             if (length * SHRINK_DEN < dim * SHRINK_NUM)
336                 shrink();
337 
338             return true;
339         }
340         return false;
341     }
342 
343     V get(scope const K key) @nogc nothrow {
344         return opIndex(key);
345     }
346 
347     V opIndex(scope const K key) @nogc nothrow {
348         if(auto ret = opBinaryRight!"in"(key))
349             return *ret;
350         return V.init;
351     }
352 
353     void opIndexAssign(scope const V value, scope const K key) @nogc nothrow {
354         set(key, value);
355     }
356 
357     V* opBinaryRight(string op)(scope const K key) @nogc nothrow {
358         static if (op == "in"){
359             if (!length)
360                 return null;
361 
362             immutable keyHash = calcHash(key);
363             if (auto buck = findSlotLookup(keyHash, key))
364                 return &buck.entry.val;
365             return null;
366         } else
367         static assert(0, "Operator "~op~" not implemented");
368     }
369 
370     /// returning slice must be deallocated like free(keys.ptr);
371     K[] keys() @nogc nothrow {
372         K[] ks = allocator.makeArray!K(length);
373         //(cast(K*)malloc(length * K.sizeof))[0..length];
374         size_t j;
375         foreach (ref b; buckets[firstUsed .. $]){
376             if (!b.filled){
377                 continue;
378             }
379             ks[j++] = b.entry.key;
380         }
381 
382         return ks;
383     }
384 
385     /// returning slice must be deallocated like free(values.ptr);
386     V[] values() @nogc nothrow {
387         V[] vals = allocator.makeArray!V(length);
388         //(cast(V*)malloc(length * V.sizeof))[0..length];
389         size_t j;
390         foreach (ref b; buckets[firstUsed .. $]){
391             if (!b.filled){
392                 continue;
393             }
394             vals[j++] = b.entry.val;
395         }
396 
397         return vals;
398     }
399 
400     void clear() @nogc nothrow { // WIP
401         /+ not sure if this works with this port
402         import core.stdc.string : memset;
403         // clear all data, but don't change bucket array length
404         memset(&buckets[firstUsed], 0, (buckets.length - firstUsed) * Bucket.sizeof);
405         +/
406         // just loop over entire slice
407         foreach(ref b; buckets)
408             if(b.entry !is null){
409                 //core.stdc.stdlib.free(b.entry);
410                 allocator.dispose(b.entry);
411                 b.entry = null;
412             }
413         deleted = used = 0;
414         firstUsed = cast(uint) dim;
415         buckets[] = Bucket.init;
416     }
417 
418     void free() @nogc nothrow {
419         foreach(ref b; buckets)
420             if(b.entry !is null){
421                 //core.stdc.stdlib.free(b.entry);
422                 allocator.dispose(b.entry);
423                 b.entry = null;
424             }
425 
426         //core.stdc.stdlib.free(buckets.ptr);
427         allocator.dispose(buckets);
428         deleted = used = 0;
429         buckets = null;
430     }
431 
432     auto copy() @nogc nothrow {
433         auto new_buckets = allocator.makeArray!Bucket(buckets.length);
434         //cast(Bucket*)malloc(buckets.length * Bucket.sizeof);
435         memcpy(new_buckets.ptr, buckets.ptr, buckets.length * Bucket.sizeof);
436         typeof(this) newAA;
437         newAA.buckets = new_buckets[0..buckets.length];
438         newAA.firstUsed = firstUsed;
439         newAA.used = used;
440         newAA.deleted = deleted;
441         return newAA;
442     }
443 
444     int opApply(int delegate(AAPair!(K, V)) @nogc nothrow dg) nothrow @nogc {
445         int result = 0;
446         if (buckets is null || buckets.length == 0)
447             return 0;
448         foreach (ref b; buckets[firstUsed .. $]){
449             if (!b.filled)
450                 continue;
451             result = dg(AAPair!(K, V)(&b.entry.key, &b.entry.val));
452             if (result) {
453                 break;
454             }
455         }
456         return 0;
457     }
458 
459     // for GC usages
460     int opApply(int delegate(AAPair!(K, V)) dg) {
461         int result = 0;
462         if (buckets is null || buckets.length == 0)
463             return 0;
464         foreach (ref b; buckets[firstUsed .. $]){
465             if (!b.filled)
466                 continue;
467             result = dg(AAPair!(K, V)(&b.entry.key, &b.entry.val));
468             if (result) {
469                 break;
470             }
471         }
472         return 0;
473     }
474     // TODO: .byKey(), .byValue(), .byKeyValue() ???
475 }
476 
477 struct AAPair(K, V) {
478     K* keyp;
479     V* valp;
480 }
481 
482 private size_t nextpow2(scope const size_t n) pure nothrow @nogc {
483     import core.bitop : bsr;
484 
485     if (!n)
486         return 1;
487 
488     const isPowerOf2 = !((n - 1) & n);
489     return 1 << (bsr(n) + !isPowerOf2);
490 }
491 
492 private size_t mix(size_t h) @safe pure nothrow @nogc {
493     enum m = 0x5bd1e995;
494     h ^= h >> 13;
495     h *= m;
496     h ^= h >> 15;
497     return h;
498 }
499 
500 private T min(T)(scope const T a, scope const T b) pure nothrow @nogc {
501     return a < b ? a : b;
502 }
503 
504 private T max(T)(scope const T a, scope const T b) pure nothrow @nogc {
505     return b < a ? a : b;
506 }
507 
508 @nogc unittest {
509     import core.stdc.stdio;
510     import core.stdc.time;
511 
512     clock_t begin = clock();
513     {
514         Bcaa!(int, int) aa0;
515         scope(exit) aa0.free;
516 
517         foreach (i; 0..1000_000){
518             aa0[i] = i;
519         }
520 
521         foreach (i; 2000..1000_000){
522             aa0.remove(i);
523         }
524 
525         printf("%d \n", aa0[1000]);
526     }
527     clock_t end = clock(); printf("Elapsed time: %f \n", cast(double)(end - begin) / CLOCKS_PER_SEC);
528 
529     {
530         Bcaa!(string, string) aa1;
531         scope(exit) aa1.free;
532 
533         aa1["Stevie"] = "Ray Vaughan";
534         aa1["Asım Can"] = "Gündüz";
535         aa1["Dan"] = "Patlansky";
536         aa1["İlter"] = "Kurcala";
537         aa1["Ferhat"] = "Kurtulmuş";
538 
539         foreach(pair; aa1){
540             printf("%s -> %s", (*pair.keyp).ptr, (*pair.valp).ptr);
541         }
542 
543         if (auto valptr = "Dan" in aa1)
544             printf("%s exists!!!!\n", (*valptr).ptr );
545         else
546             printf("does not exist!!!!\n".ptr);
547 
548         assert(aa1.remove("Ferhat") == true);
549         assert(aa1["Ferhat"] == null);
550         assert(aa1.remove("Foe") == false);
551         assert(aa1["İlter"] =="Kurcala");
552 
553         aa1.rehash();
554 
555         printf("%s\n",aa1["Stevie"].ptr);
556         printf("%s\n",aa1["Asım Can"].ptr);
557         printf("%s\n",aa1["Dan"].ptr);
558         printf("%s\n",aa1["Ferhat"].ptr);
559 
560         auto keys = aa1.keys;
561         scope(exit) Mallocator.instance.dispose(keys);
562         foreach(key; keys)
563             printf("%s -> %s \n", key.ptr, aa1[key].ptr);
564         //core.stdc.stdlib.free(keys.ptr);
565 
566         struct Guitar {
567             string brand;
568         }
569 
570         Bcaa!(int, Guitar) guitars;
571         scope(exit) guitars.free;
572 
573         guitars[0] = Guitar("Fender");
574         guitars[3] = Guitar("Gibson");
575         guitars[356] = Guitar("Stagg");
576 
577         assert(guitars[3].brand == "Gibson");
578 
579         printf("%s \n", guitars[356].brand.ptr);
580 
581         if(auto valPtr = 3 in guitars)
582             printf("%s \n", (*valPtr).brand.ptr);
583     }
584 }
585 
586 // Test "in" works for AA without allocated storage.
587 @nogc unittest {
588     Bcaa!(int, int) emptyMap;
589     assert(0 !in emptyMap);
590 
591 }
592 
593 // Try to force a memory leak - issue #5
594 @nogc unittest {
595     struct S {
596         int x;
597         int y;
598         string txt;
599     }
600 
601     Bcaa!(int, S) aas;
602     scope(exit) aas.free;
603 
604     for(int i = 1024; i < 2048; i++) {
605         aas[i] = S(i, i*2, "caca\0");
606     }
607     aas[100] = S(10, 20, "caca\0");
608 
609     import core.stdc.stdio;
610     printf(".x=%d .y%d %s\n", aas[100].x, aas[100].y, aas[100].txt.ptr);
611 
612     for(int i = 1024; i < 2048; i++) {
613         aas.remove(i);
614     }
615 }
616