1 /** Simple associative array implementation for D (-betterC)
2 
3 The author of the original implementation: Martin Nowak
4 
5 Copyright:
6  Copyright (c) 2020, Ferhat Kurtulmuş.
7 
8  License:
9    $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
10 
11 Simplified betterC port of druntime/blob/master/src/rt/aaA.d
12 */
13 
14 module bcaa;
15 
16 version(LDC){
17     version(D_BetterC){
18         pragma(LDC_no_moduleinfo);
19     }
20 }
21 
22 import core.stdc.stdlib;
23 import core.stdc.string;
24 
25 // grow threshold
26 private enum GROW_NUM = 4;
27 private enum GROW_DEN = 5;
28 // shrink threshold
29 private enum SHRINK_NUM = 1;
30 private enum SHRINK_DEN = 8;
31 // grow factor
32 private enum GROW_FAC = 4;
33 // growing the AA doubles it's size, so the shrink threshold must be
34 // smaller than half the grow threshold to have a hysteresis
35 static assert(GROW_FAC * SHRINK_NUM * GROW_DEN < GROW_NUM * SHRINK_DEN);
36 // initial load factor (for literals), mean of both thresholds
37 private enum INIT_NUM = (GROW_DEN * SHRINK_NUM + GROW_NUM * SHRINK_DEN) / 2;
38 private enum INIT_DEN = SHRINK_DEN * GROW_DEN;
39 
40 private enum INIT_NUM_BUCKETS = 8;
41 // magic hash constants to distinguish empty, deleted, and filled buckets
42 private enum HASH_EMPTY = 0;
43 private enum HASH_DELETED = 0x1;
44 private enum HASH_FILLED_MARK = size_t(1) << 8 * size_t.sizeof - 1;
45 
46 private {
47     alias hash_t = size_t;
48 
49     struct KeyType(K){
50         alias Key = K;
51 
52         static hash_t getHash(scope const Key key) @nogc @safe nothrow pure {
53             return key.hashOf;
54         }
55 
56         static bool equals(scope const Key k1, scope const Key k2) @nogc nothrow pure {
57             static if(is(K == const(char)*)){
58                 return strlen(k1) == strlen(k2) &&
59                     strcmp(k1, k2) == 0;
60             } else {
61                 return k1 == k2;
62             }
63         }
64     }
65 }
66 
67 struct Bcaa(K, V){
68     
69     struct Node{
70         K key;
71         V val;
72     }
73 
74     struct Bucket {
75     private pure nothrow @nogc:
76         size_t hash;
77         Node* entry;
78         @property bool empty() const @nogc nothrow {
79             return hash == HASH_EMPTY;
80         }
81 
82         @property bool deleted() const @nogc nothrow {
83             return hash == HASH_DELETED;
84         }
85 
86         @property bool filled() const @safe @nogc nothrow {
87             return cast(ptrdiff_t) hash < 0;
88         }
89     }
90     
91     private Bucket[] buckets;
92 
93     uint firstUsed;
94     uint used;
95     uint deleted;
96 
97     alias TKey = KeyType!K;
98     TKey tkey;
99 
100     @property size_t length() const pure nothrow @nogc {
101         //assert(used >= deleted);
102         return used - deleted;
103     }
104 
105     private Bucket[] allocHtable(scope const size_t sz) @nogc nothrow {
106         Bucket[] _htable = (cast(Bucket*)malloc(sz * Bucket.sizeof))[0..sz];
107         _htable[] = Bucket.init;
108         return _htable;
109     }
110 
111     private void initTableIfNeeded() @nogc nothrow {
112         if(buckets is null){
113             buckets = allocHtable(INIT_NUM_BUCKETS);
114             firstUsed = cast(uint)INIT_NUM_BUCKETS;
115         }
116             
117     }
118     @property size_t dim() const pure nothrow @nogc {
119         return buckets.length;
120     }
121 
122     @property size_t mask() const pure nothrow @nogc {
123         return dim - 1;
124     }
125 
126     inout(Bucket)* findSlotInsert(const size_t hash) inout pure nothrow @nogc {
127         for (size_t i = hash & mask, j = 1;; ++j){
128             if (!buckets[i].filled)
129                 return &buckets[i];
130             i = (i + j) & mask;
131         }
132     }
133 
134     inout(Bucket)* findSlotLookup(size_t hash, scope const K key) inout nothrow @nogc {
135         for (size_t i = hash & mask, j = 1;; ++j){
136 
137             if (buckets[i].hash == hash && tkey.equals(key, buckets[i].entry.key)){
138                 return &buckets[i];
139             }
140                 
141             else if (buckets[i].empty)
142                 return null;
143             i = (i + j) & mask;
144         }
145     }
146 
147     void set(scope const K key, scope const V val) @nogc nothrow {
148         initTableIfNeeded();
149         
150         immutable keyHash = calcHash(key);
151 
152         if (auto p = findSlotLookup(keyHash, key)){
153             p.entry.val = cast(V)val;
154             return;
155         }
156 
157         auto p = findSlotInsert(keyHash);
158         
159         if (p.deleted)
160             --deleted;
161         
162         // check load factor and possibly grow
163         else if (++used * GROW_DEN > dim * GROW_NUM){
164             grow();
165             p = findSlotInsert(keyHash);
166             //assert(p.empty);
167         }
168         
169         // update search cache and allocate entry
170         firstUsed = min(firstUsed, cast(uint)(p - buckets.ptr));
171 
172         Node* newNode = cast(Node*)malloc(Node.sizeof);
173         newNode.key = key;
174         newNode.val = cast(V)val;
175 
176         p.hash = keyHash;
177         p.entry = newNode;
178     }
179 
180     private size_t calcHash(scope const K pkey) pure @nogc nothrow {
181         // highest bit is set to distinguish empty/deleted from filled buckets
182         immutable hash = tkey.getHash(pkey);
183         return mix(hash) | HASH_FILLED_MARK;
184     }
185     
186     void resize(const size_t sz) @nogc nothrow {
187         auto obuckets = buckets;
188         buckets = allocHtable(sz);
189 
190         foreach (ref b; obuckets[firstUsed .. $]){
191             if (b.filled)
192                 *findSlotInsert(b.hash) = b;
193             if (b.empty){
194                 core.stdc.stdlib.free(b.entry);
195                 b.entry = null;
196             }
197                 
198         }
199 
200         firstUsed = 0;
201         used -= deleted;
202         deleted = 0;
203 
204         core.stdc.stdlib.free(obuckets.ptr);
205     }
206 
207     void rehash() @nogc nothrow {
208         if (length)
209             resize(nextpow2(INIT_DEN * length / INIT_NUM));
210     }
211 
212     void grow() @nogc nothrow {
213         if (length * SHRINK_DEN < GROW_FAC * dim * SHRINK_NUM)
214             resize(dim);
215         else
216             resize(GROW_FAC * dim);
217     }
218 
219     void shrink() @nogc nothrow {
220         if (dim > INIT_NUM_BUCKETS)
221             resize(dim / GROW_FAC);
222     }
223 
224     bool remove(scope const K key) @nogc nothrow {
225         if (!length)
226             return false;
227 
228         immutable hash = calcHash(key);
229         if (auto p = findSlotLookup(hash, key)){
230             // clear entry
231             p.hash = HASH_DELETED;
232             //core.stdc.stdlib.free(p.entry);
233             //p.entry = null;
234 
235             ++deleted;
236             if (length * SHRINK_DEN < dim * SHRINK_NUM)
237                 shrink();
238 
239             return true;
240         }
241         return false;
242     }
243 
244     V get(scope const K key) @nogc nothrow {
245         return opIndex(key);
246     }
247 
248     V opIndex(scope const K key) @nogc nothrow {
249         if(auto ret = opBinaryRight!"in"(key))
250             return *ret;
251         return V.init;
252     }
253 
254     void opIndexAssign(scope const V value, scope const K key) @nogc nothrow {
255         set(key, value);
256     }
257 
258     V* opBinaryRight(string op)(scope const K key) @nogc nothrow {
259         static if (op == "in"){
260             if (!length)
261                 return null;
262 
263             immutable keyHash = calcHash(key);
264             if (auto buck = findSlotLookup(keyHash, key))
265                 return &buck.entry.val;
266             return null;
267         } else
268         static assert(0, "Operator "~op~" not implemented");
269     }
270 
271     /// returning slice must be deallocated like free(keys.ptr);
272     K[] keys() @nogc nothrow {
273         K[] ks = (cast(K*)malloc(length * K.sizeof))[0..length];
274         size_t j;
275         foreach (ref b; buckets[firstUsed .. $]){
276             if (!b.filled){
277                 continue;
278             }
279             ks[j++] = b.entry.key;
280         }
281 
282         return ks;
283     }
284 
285     /// returning slice must be deallocated like free(values.ptr);
286     V[] values() @nogc nothrow {
287         V[] vals = (cast(V*)malloc(length * V.sizeof))[0..length];
288         size_t j;
289         foreach (ref b; buckets[firstUsed .. $]){
290             if (!b.filled){
291                 continue;
292             }
293             vals[j++] = b.entry.val;
294         }
295 
296         return vals;
297     }
298 
299     void clear() @nogc nothrow { // WIP 
300         /+ not sure if this works with this port
301         import core.stdc.string : memset;
302         // clear all data, but don't change bucket array length
303         memset(&buckets[firstUsed], 0, (buckets.length - firstUsed) * Bucket.sizeof);
304         +/
305         // just loop over entire slice
306         foreach(ref b; buckets)
307             if(b.entry !is null){
308                 core.stdc.stdlib.free(b.entry);
309                 b.entry = null;
310             }
311         deleted = used = 0;
312         firstUsed = cast(uint) dim;
313         buckets[] = Bucket.init;
314     }
315 
316     void free() @nogc nothrow {
317         foreach(ref b; buckets)
318             if(b.entry !is null){
319                 core.stdc.stdlib.free(b.entry);
320                 b.entry = null;
321             }
322                 
323         core.stdc.stdlib.free(buckets.ptr);
324         deleted = used = 0;
325         buckets = null;
326     }
327 
328     Bcaa!(K, V) copy() @nogc nothrow {
329         auto new_buckets_ptr = cast(Bucket*)malloc(buckets.length * Bucket.sizeof);
330         memcpy(new_buckets_ptr, buckets.ptr, buckets.length * Bucket.sizeof);
331         Bcaa!(K, V) newAA;
332         newAA.buckets = new_buckets_ptr[0..buckets.length];
333         newAA.firstUsed = firstUsed;
334         newAA.used = used;
335         newAA.deleted = deleted;
336         return newAA;
337     }
338 
339     int opApply(int delegate(AAPair!(K, V)) @nogc nothrow dg) nothrow @nogc {
340         int result = 0;
341         if (buckets is null || buckets.length == 0)
342             return 0;
343         foreach (ref b; buckets[firstUsed .. $]){
344             if (!b.filled)
345                 continue;
346             result = dg(AAPair!(K, V)(&b.entry.key, &b.entry.val));
347             if (result) {
348                 break;
349             }
350         }
351         return 0;
352     }
353 
354     // for GC usages
355     int opApply(int delegate(AAPair!(K, V)) dg) {
356         int result = 0;
357         if (buckets is null || buckets.length == 0)
358             return 0;
359         foreach (ref b; buckets[firstUsed .. $]){
360             if (!b.filled)
361                 continue;
362             result = dg(AAPair!(K, V)(&b.entry.key, &b.entry.val));
363             if (result) {
364                 break;
365             }
366         }
367         return 0;
368     }
369     // TODO: .byKey(), .byValue(), .byKeyValue() ???
370 }
371 
372 struct AAPair(K, V) {
373     K* keyp;
374     V* valp;
375 }
376 
377 private size_t nextpow2(scope const size_t n) pure nothrow @nogc {
378     import core.bitop : bsr;
379 
380     if (!n)
381         return 1;
382 
383     const isPowerOf2 = !((n - 1) & n);
384     return 1 << (bsr(n) + !isPowerOf2);
385 }
386 
387 private size_t mix(size_t h) @safe pure nothrow @nogc {
388     enum m = 0x5bd1e995;
389     h ^= h >> 13;
390     h *= m;
391     h ^= h >> 15;
392     return h;
393 }
394 
395 private T min(T)(scope const T a, scope const T b) pure nothrow @nogc {
396     return a < b ? a : b;
397 }
398 
399 private T max(T)(scope const T a, scope const T b) pure nothrow @nogc {
400     return b < a ? a : b;
401 }
402 
403 unittest {
404     import core.stdc.stdio;
405     import core.stdc.time;
406 
407     clock_t begin = clock();
408 
409     Bcaa!(int, int) aa0;
410 
411     foreach (i; 0..1000000){
412         aa0[i] = i;
413     }
414 
415     foreach (i; 2000..1000000){
416         aa0.remove(i);
417     }
418 
419     printf("%d \n", aa0[1000]);
420     aa0.free;
421 
422     clock_t end = clock(); printf("Elapsed time: %f \n", cast(double)(end - begin) / CLOCKS_PER_SEC);
423 
424     Bcaa!(string, string) aa1;
425 
426     aa1["Stevie"] = "Ray Vaughan";
427     aa1["Asım Can"] = "Gündüz";
428     aa1["Dan"] = "Patlansky";
429     aa1["İlter"] = "Kurcala";
430     aa1["Ferhat"] = "Kurtulmuş";
431 
432     foreach(pair; aa1){
433         printf("%s -> %s", (*pair.keyp).ptr, (*pair.valp).ptr);
434     }
435 
436     if (auto valptr = "Dan" in aa1)
437         printf("%s exists!!!!\n", (*valptr).ptr );
438     else
439         printf("does not exist!!!!\n".ptr);
440 
441     assert(aa1.remove("Ferhat") == true);
442     assert(aa1["Ferhat"] == null);
443     assert(aa1.remove("Foe") == false);
444     assert(aa1["İlter"] =="Kurcala");
445 
446     aa1.rehash();
447 
448     printf("%s\n",aa1["Stevie"].ptr);
449     printf("%s\n",aa1["Asım Can"].ptr);
450     printf("%s\n",aa1["Dan"].ptr);
451     printf("%s\n",aa1["Ferhat"].ptr);
452 
453     auto keys = aa1.keys;
454     foreach(key; keys)
455         printf("%s -> %s \n", key.ptr, aa1[key].ptr);
456     core.stdc.stdlib.free(keys.ptr);
457     aa1.free;
458 
459     struct Guitar {
460         string brand;
461     }
462 
463     Bcaa!(int, Guitar) guitars;
464 
465     guitars[0] = Guitar("Fender");
466     guitars[3] = Guitar("Gibson");
467     guitars[356] = Guitar("Stagg");
468 
469     assert(guitars[3].brand == "Gibson");
470 
471     printf("%s \n", guitars[356].brand.ptr);
472 
473     if(auto valPtr = 3 in guitars)
474         printf("%s \n", (*valPtr).brand.ptr);
475 
476     guitars.free;
477 }
478 
479 // Test "in" works for AA without allocated storage.
480 unittest {
481     Bcaa!(int, int) emptyMap;
482     assert(0 !in emptyMap);
483 }