1 /** Simple associative array implementation for D (-betterC)
2 
3 The author of the original implementation: Martin Nowak
4 
5 Copyright:
6  Copyright (c) 2020, Ferhat Kurtulmuş.
7 
8  License:
9    $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
10 
11 Simplified betterC port of druntime/blob/master/src/rt/aaA.d
12 */
13 
14 module bcaa;
15 
16 version(LDC){
17     pragma(LDC_no_moduleinfo);
18 }
19 
20 import core.stdc.stdlib;
21 import core.stdc.string;
22 
23 // grow threshold
24 private enum GROW_NUM = 4;
25 private enum GROW_DEN = 5;
26 // shrink threshold
27 private enum SHRINK_NUM = 1;
28 private enum SHRINK_DEN = 8;
29 // grow factor
30 private enum GROW_FAC = 4;
31 // growing the AA doubles it's size, so the shrink threshold must be
32 // smaller than half the grow threshold to have a hysteresis
33 static assert(GROW_FAC * SHRINK_NUM * GROW_DEN < GROW_NUM * SHRINK_DEN);
34 // initial load factor (for literals), mean of both thresholds
35 private enum INIT_NUM = (GROW_DEN * SHRINK_NUM + GROW_NUM * SHRINK_DEN) / 2;
36 private enum INIT_DEN = SHRINK_DEN * GROW_DEN;
37 
38 private enum INIT_NUM_BUCKETS = 8;
39 // magic hash constants to distinguish empty, deleted, and filled buckets
40 private enum HASH_EMPTY = 0;
41 private enum HASH_DELETED = 0x1;
42 private enum HASH_FILLED_MARK = size_t(1) << 8 * size_t.sizeof - 1;
43 
44 private {
45     alias hash_t = size_t;
46 
47     struct KeyType(K){
48         alias Key = K;
49 
50         static hash_t getHash(scope const Key key) @nogc @safe nothrow pure {
51             return key.hashOf;
52         }
53 
54         static bool equals(scope const Key k1, scope const Key k2) @nogc nothrow pure {
55             static if(is(K : int) || is(K : long)){
56                 return k1 == k2;
57             } else
58             static if(is(K == string)){
59                 return k1.length == k2.length &&
60                     memcmp(k1.ptr, k2.ptr, k1.length) == 0;
61             } else
62             static if(is(K == const(char)*)){
63                 return strlen(k1) == strlen(k2) &&
64                     strcmp(k1, k2) == 0;
65             } else
66             static assert(false, "Unsupported key type!");
67         }
68     }
69 }
70 
71 struct Bcaa(K, V){
72     
73     struct Node{
74         K key;
75         V val;
76     }
77 
78     struct Bucket {
79     private pure nothrow @nogc:
80         size_t hash;
81         Node* entry;
82         @property bool empty() const @nogc nothrow {
83             return hash == HASH_EMPTY;
84         }
85 
86         @property bool deleted() const @nogc nothrow {
87             return hash == HASH_DELETED;
88         }
89 
90         @property bool filled() const @safe @nogc nothrow {
91             return cast(ptrdiff_t) hash < 0;
92         }
93     }
94     
95     private Bucket[] buckets;
96 
97     uint firstUsed;
98     uint used;
99     uint deleted;
100 
101     alias TKey = KeyType!K;
102     TKey tkey;
103 
104     @property size_t length() const pure nothrow @nogc {
105         //assert(used >= deleted);
106         return used - deleted;
107     }
108 
109     private Bucket[] allocHtable(scope const size_t sz) @nogc nothrow {
110         Bucket[] _htable = (cast(Bucket*)malloc(sz * Bucket.sizeof))[0..sz];
111         _htable[] = Bucket.init;
112         return _htable;
113     }
114 
115     private void initTableIfNeeded() @nogc nothrow {
116         if(buckets is null){
117             buckets = allocHtable(INIT_NUM_BUCKETS);
118             firstUsed = cast(uint)INIT_NUM_BUCKETS;
119         }
120             
121     }
122     @property size_t dim() const pure nothrow @nogc {
123         return buckets.length;
124     }
125 
126     @property size_t mask() const pure nothrow @nogc {
127         return dim - 1;
128     }
129 
130     inout(Bucket)* findSlotInsert(const size_t hash) inout pure nothrow @nogc {
131         for (size_t i = hash & mask, j = 1;; ++j){
132             if (!buckets[i].filled)
133                 return &buckets[i];
134             i = (i + j) & mask;
135         }
136     }
137 
138     inout(Bucket)* findSlotLookup(size_t hash, scope const K key) inout nothrow @nogc {
139         for (size_t i = hash & mask, j = 1;; ++j){
140 
141             if (buckets[i].hash == hash && tkey.equals(key, buckets[i].entry.key)){
142                 return &buckets[i];
143             }
144                 
145             else if (buckets[i].empty)
146                 return null;
147             i = (i + j) & mask;
148         }
149     }
150 
151     void set(scope const K key, scope const V val) @nogc nothrow {
152         initTableIfNeeded();
153         
154         immutable keyHash = calcHash(key);
155 
156         if (auto p = findSlotLookup(keyHash, key)){
157             p.entry.val = val;
158             return;
159         }
160 
161         auto p = findSlotInsert(keyHash);
162         
163         if (p.deleted)
164             --deleted;
165         
166         // check load factor and possibly grow
167         else if (++used * GROW_DEN > dim * GROW_NUM){
168             grow();
169             p = findSlotInsert(keyHash);
170             //assert(p.empty);
171         }
172         
173         // update search cache and allocate entry
174         firstUsed = min(firstUsed, cast(uint)(p - buckets.ptr));
175 
176         Node* newNode = cast(Node*)malloc(Node.sizeof);
177         newNode.key = key;
178         newNode.val = val;
179 
180         p.hash = keyHash;
181         p.entry = newNode;
182     }
183 
184     private size_t calcHash(scope const K pkey) pure @nogc nothrow {
185         // highest bit is set to distinguish empty/deleted from filled buckets
186         immutable hash = tkey.getHash(pkey);
187         return mix(hash) | HASH_FILLED_MARK;
188     }
189     
190     void resize(const size_t sz) @nogc nothrow {
191         auto obuckets = buckets;
192         buckets = allocHtable(sz);
193 
194         foreach (ref b; obuckets[firstUsed .. $]){
195             if (b.filled)
196                 *findSlotInsert(b.hash) = b;
197             if (b.empty){
198                 core.stdc.stdlib.free(b.entry);
199                 b.entry = null;
200             }
201                 
202         }
203 
204         firstUsed = 0;
205         used -= deleted;
206         deleted = 0;
207 
208         core.stdc.stdlib.free(obuckets.ptr);
209     }
210 
211     void rehash() @nogc nothrow {
212         if (length)
213             resize(nextpow2(INIT_DEN * length / INIT_NUM));
214     }
215 
216     void grow() @nogc nothrow {
217         if (length * SHRINK_DEN < GROW_FAC * dim * SHRINK_NUM)
218             resize(dim);
219         else
220             resize(GROW_FAC * dim);
221     }
222 
223     void shrink() @nogc nothrow {
224         if (dim > INIT_NUM_BUCKETS)
225             resize(dim / GROW_FAC);
226     }
227 
228     bool remove(scope const K key) @nogc nothrow {
229         if (!length)
230             return false;
231 
232         immutable hash = calcHash(key);
233         if (auto p = findSlotLookup(hash, key)){
234             // clear entry
235             p.hash = HASH_DELETED;
236             //core.stdc.stdlib.free(p.entry);
237             //p.entry = null;
238 
239             ++deleted;
240             if (length * SHRINK_DEN < dim * SHRINK_NUM)
241                 shrink();
242 
243             return true;
244         }
245         return false;
246     }
247 
248     V get(scope const K key) @nogc nothrow {
249         return opIndex(key);
250     }
251 
252     V opIndex(scope const K key) @nogc nothrow {
253         if(auto ret = opBinaryRight!"in"(key))
254             return *ret;
255         return V.init;
256     }
257 
258     void opIndexAssign(scope const V value, scope const K key) @nogc nothrow {
259         set(key, value);
260     }
261 
262     V* opBinaryRight(string op)(scope const K key) @nogc nothrow {
263         static if (op == "in"){
264             immutable keyHash = calcHash(key);
265             if (auto buck = findSlotLookup(keyHash, key))
266                 return &buck.entry.val;
267             return null;
268         } else
269         static assert(0, "Operator "~op~" not implemented");
270     }
271 
272     /// returning slice must be deallocated like free(keys.ptr);
273     K[] keys() @nogc nothrow {
274         K[] ks = (cast(K*)malloc(length * K.sizeof))[0..length];
275         size_t j;
276         foreach(i; 0..buckets.length){
277             auto buck = buckets[i];
278             if (!buck.filled){
279                 continue;
280             }
281             Node* tmp = buck.entry;
282             ks[j++] = tmp.key;
283         }
284 
285         return ks;
286     }
287 
288     /// returning slice must be deallocated like free(values.ptr);
289     V[] values() @nogc nothrow {
290         V[] vals = (cast(V*)malloc(length * V.sizeof))[0..length];
291         size_t j;
292         foreach(i; 0..buckets.length){
293             auto buck = buckets[i];
294             if (!buck.filled){
295                 continue;
296             }
297             Node* tmp = buck.entry;
298             vals[j++] = tmp.val;
299         }
300 
301         return vals;
302     }
303 
304     void clear() @nogc nothrow { // WIP 
305         /+ not sure if this works with this port
306         import core.stdc.string : memset;
307         // clear all data, but don't change bucket array length
308         memset(&buckets[firstUsed], 0, (buckets.length - firstUsed) * Bucket.sizeof);
309         +/
310         // just loop over entire slice
311         foreach(ref b; buckets)
312             if(b.entry !is null){
313                 core.stdc.stdlib.free(b.entry);
314                 b.entry = null;
315             }
316         deleted = used = 0;
317         firstUsed = cast(uint) dim;
318         buckets[] = Bucket.init;
319     }
320 
321     void free() @nogc nothrow {
322         foreach(ref b; buckets)
323             if(b.entry !is null){
324                 core.stdc.stdlib.free(b.entry);
325                 b.entry = null;
326             }
327                 
328         core.stdc.stdlib.free(buckets.ptr);
329         deleted = used = 0;
330         buckets = null;
331     }
332 
333     // TODO: .byKey(), .byValue(), .byKeyValue()
334 }
335 
336 private size_t nextpow2(scope const size_t n) pure nothrow @nogc {
337     import core.bitop : bsr;
338 
339     if (!n)
340         return 1;
341 
342     const isPowerOf2 = !((n - 1) & n);
343     return 1 << (bsr(n) + !isPowerOf2);
344 }
345 
346 private size_t mix(size_t h) @safe pure nothrow @nogc {
347     enum m = 0x5bd1e995;
348     h ^= h >> 13;
349     h *= m;
350     h ^= h >> 15;
351     return h;
352 }
353 
354 private T min(T)(scope const T a, scope const T b) pure nothrow @nogc {
355     return a < b ? a : b;
356 }
357 
358 private T max(T)(scope const T a, scope const T b) pure nothrow @nogc {
359     return b < a ? a : b;
360 }
361 
362 unittest {
363     import core.stdc.stdio;
364     import core.stdc.time;
365 
366     clock_t begin = clock();
367 
368     Bcaa!(int, int) aa0;
369 
370     foreach (i; 0..1000000){
371         aa0[i] = i;
372     }
373 
374     foreach (i; 2000..1000000){
375         aa0.remove(i);
376     }
377 
378     printf("%d \n", aa0[1000]);
379     aa0.free;
380 
381     clock_t end = clock(); printf("Elapsed time: %f \n", cast(double)(end - begin) / CLOCKS_PER_SEC);
382 
383     Bcaa!(string, string) aa1;
384 
385     aa1["Stevie"] = "Ray Vaughan";
386     aa1["Asım Can"] = "Gündüz";
387     aa1["Dan"] = "Patlansky";
388     aa1["İlter"] = "Kurcala";
389     aa1["Ferhat"] = "Kurtulmuş";
390 
391     if (auto valptr = "Dan" in aa1)
392         printf("%s exists!!!!\n", (*valptr).ptr );
393     else
394         printf("does not exist!!!!\n".ptr);
395 
396     assert(aa1.remove("Ferhat") == true);
397     assert(aa1["Ferhat"] == null);
398     assert(aa1.remove("Foe") == false);
399     assert(aa1["İlter"] =="Kurcala");
400 
401     aa1.rehash();
402 
403     printf("%s\n",aa1["Stevie"].ptr);
404     printf("%s\n",aa1["Asım Can"].ptr);
405     printf("%s\n",aa1["Dan"].ptr);
406     printf("%s\n",aa1["Ferhat"].ptr);
407 
408     auto keys = aa1.keys;
409     foreach(key; keys)
410         printf("%s -> %s \n", key.ptr, aa1[key].ptr);
411     core.stdc.stdlib.free(keys.ptr);
412     aa1.free;
413 
414     struct Guitar {
415         string brand;
416     }
417 
418     Bcaa!(int, Guitar) guitars;
419 
420     guitars[0] = Guitar("Fender");
421     guitars[3] = Guitar("Gibson");
422     guitars[356] = Guitar("Stagg");
423 
424     assert(guitars[3].brand == "Gibson");
425 
426     printf("%s \n", guitars[356].brand.ptr);
427 
428     if(auto valPtr = 3 in guitars)
429         printf("%s \n", (*valPtr).brand.ptr);
430 
431     guitars.free;
432 }