1 /** Simple associative array implementation for D (-betterC)
2 
3 The author of the original implementation: Martin Nowak
4 
5 Copyright:
6  Copyright (c) 2020, Ferhat Kurtulmuş.
7 
8  License:
9    $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
10 
11 Simplified betterC port of druntime/blob/master/src/rt/aaA.d
12 */
13 
14 module bcaa;
15 
16 version(LDC){
17     pragma(LDC_no_moduleinfo);
18 }
19 
20 import core.stdc.stdlib;
21 import core.stdc.string;
22 
23 // grow threshold
24 private enum GROW_NUM = 4;
25 private enum GROW_DEN = 5;
26 // shrink threshold
27 private enum SHRINK_NUM = 1;
28 private enum SHRINK_DEN = 8;
29 // grow factor
30 private enum GROW_FAC = 4;
31 // growing the AA doubles it's size, so the shrink threshold must be
32 // smaller than half the grow threshold to have a hysteresis
33 static assert(GROW_FAC * SHRINK_NUM * GROW_DEN < GROW_NUM * SHRINK_DEN);
34 // initial load factor (for literals), mean of both thresholds
35 private enum INIT_NUM = (GROW_DEN * SHRINK_NUM + GROW_NUM * SHRINK_DEN) / 2;
36 private enum INIT_DEN = SHRINK_DEN * GROW_DEN;
37 
38 private enum INIT_NUM_BUCKETS = 8;
39 // magic hash constants to distinguish empty, deleted, and filled buckets
40 private enum HASH_EMPTY = 0;
41 private enum HASH_DELETED = 0x1;
42 private enum HASH_FILLED_MARK = size_t(1) << 8 * size_t.sizeof - 1;
43 
44 private {
45     alias hash_t = size_t;
46 
47     struct KeyType(K){
48         alias Key = K;
49 
50         static hash_t getHash(scope const Key key) @nogc @safe nothrow pure {
51             return key.hashOf;
52         }
53 
54         static bool equals(scope const Key k1, scope const Key k2) @nogc nothrow pure {
55             static if(is(K : int)){
56                 return k1 == k2;
57             } else
58             static if(is(K == string)){
59                 return k1.length == k2.length &&
60                     memcmp(k1.ptr, k2.ptr, k1.length) == 0;
61             } else
62             static assert(false, "Unsupported key type!");
63         }
64     }
65 }
66 
67 struct Bcaa(K, V){
68     
69     struct Node{
70         K key;
71         V val;
72     }
73 
74     struct Bucket {
75     private pure nothrow @nogc:
76         size_t hash;
77         Node* entry;
78         @property bool empty() const @nogc nothrow {
79             return hash == HASH_EMPTY;
80         }
81 
82         @property bool deleted() const @nogc nothrow {
83             return hash == HASH_DELETED;
84         }
85 
86         @property bool filled() const @safe @nogc nothrow {
87             return cast(ptrdiff_t) hash < 0;
88         }
89     }
90     
91     private Bucket[] buckets;
92 
93     uint firstUsed;
94     uint used;
95     uint deleted;
96 
97     alias TKey = KeyType!K;
98     TKey tkey;
99 
100     @property size_t length() const pure nothrow @nogc {
101         //assert(used >= deleted);
102         return used - deleted;
103     }
104 
105     private Bucket[] allocHtable(scope const size_t sz) @nogc nothrow {
106         Bucket[] _htable = (cast(Bucket*)malloc(sz * Bucket.sizeof))[0..sz];
107         _htable[] = Bucket.init;
108         return _htable;
109     }
110 
111     private void initTableIfNeeded() @nogc nothrow {
112         if(buckets is null){
113             buckets = allocHtable(INIT_NUM_BUCKETS);
114             firstUsed = cast(uint)INIT_NUM_BUCKETS;
115         }
116             
117     }
118     @property size_t dim() const pure nothrow @nogc {
119         return buckets.length;
120     }
121 
122     @property size_t mask() const pure nothrow @nogc {
123         return dim - 1;
124     }
125 
126     inout(Bucket)* findSlotInsert(const size_t hash) inout pure nothrow @nogc {
127         for (size_t i = hash & mask, j = 1;; ++j){
128             if (!buckets[i].filled)
129                 return &buckets[i];
130             i = (i + j) & mask;
131         }
132     }
133 
134     inout(Bucket)* findSlotLookup(size_t hash, scope const K key) inout nothrow @nogc {
135         for (size_t i = hash & mask, j = 1;; ++j){
136 
137             if (buckets[i].hash == hash && tkey.equals(key, buckets[i].entry.key)){
138                 return &buckets[i];
139             }
140                 
141             else if (buckets[i].empty)
142                 return null;
143             i = (i + j) & mask;
144         }
145     }
146 
147     void set(scope const K key, scope const V val) @nogc nothrow {
148         initTableIfNeeded();
149         
150         immutable keyHash = calcHash(key);
151 
152         if (auto p = findSlotLookup(keyHash, key)){
153             p.entry.val = val;
154             return;
155         }
156 
157         auto p = findSlotInsert(keyHash);
158         
159         if (p.deleted)
160             --deleted;
161         
162         // check load factor and possibly grow
163         else if (++used * GROW_DEN > dim * GROW_NUM){
164             grow();
165             p = findSlotInsert(keyHash);
166             //assert(p.empty);
167         }
168         
169         // update search cache and allocate entry
170         firstUsed = min(firstUsed, cast(uint)(p - buckets.ptr));
171 
172         Node* newNode = cast(Node*)malloc(Node.sizeof);
173         newNode.key = key;
174         newNode.val = val;
175 
176         p.hash = keyHash;
177         p.entry = newNode;
178     }
179 
180     private size_t calcHash(scope const K pkey) pure @nogc nothrow {
181         // highest bit is set to distinguish empty/deleted from filled buckets
182         immutable hash = tkey.getHash(pkey);
183         return mix(hash) | HASH_FILLED_MARK;
184     }
185     
186     void resize(const size_t sz) @nogc nothrow {
187         auto obuckets = buckets;
188         buckets = allocHtable(sz);
189 
190         foreach (ref b; obuckets[firstUsed .. $]){
191             if (b.filled)
192                 *findSlotInsert(b.hash) = b;
193             if (b.empty){
194                 core.stdc.stdlib.free(b.entry);
195                 b.entry = null;
196             }
197                 
198         }
199 
200         firstUsed = 0;
201         used -= deleted;
202         deleted = 0;
203 
204         core.stdc.stdlib.free(obuckets.ptr);
205     }
206 
207     void rehash() @nogc nothrow {
208         if (!length)
209             resize(nextpow2(INIT_DEN * length / INIT_NUM));
210     }
211 
212     void grow() @nogc nothrow {
213         if (length * SHRINK_DEN < GROW_FAC * dim * SHRINK_NUM)
214             resize(dim);
215         else
216             resize(GROW_FAC * dim);
217     }
218 
219     void shrink() @nogc nothrow {
220         if (dim > INIT_NUM_BUCKETS)
221             resize(dim / GROW_FAC);
222     }
223 
224     bool remove(scope const K key) @nogc nothrow {
225         if (!length)
226             return false;
227 
228         immutable hash = calcHash(key);
229         if (auto p = findSlotLookup(hash, key)){
230             // clear entry
231             p.hash = HASH_DELETED;
232             //core.stdc.stdlib.free(p.entry);
233             //p.entry = null;
234 
235             ++deleted;
236             if (length * SHRINK_DEN < dim * SHRINK_NUM)
237                 shrink();
238 
239             return true;
240         }
241         return false;
242     }
243 
244     V get(scope const K key) @nogc nothrow {
245         return opIndex(key);
246     }
247 
248     V opIndex(scope const K key) @nogc nothrow {
249         if(auto ret = opBinaryRight!"in"(key))
250             return *ret;
251         return V.init;
252     }
253 
254     void opIndexAssign(scope const V value, scope const K key) @nogc nothrow {
255         set(key, value);
256     }
257 
258     V* opBinaryRight(string op)(scope const K key) @nogc nothrow {
259         static if (op == "in"){
260             immutable keyHash = calcHash(key);
261             if (auto buck = findSlotLookup(keyHash, key))
262                 return &buck.entry.val;
263             return null;
264         } else
265         static assert(0, "Operator "~op~" not implemented");
266     }
267 
268     /// returning slice must be deallocated like free(keys.ptr);
269     K[] keys() @nogc nothrow {
270         K[] ks = (cast(K*)malloc(length * K.sizeof))[0..length];
271         size_t j;
272         foreach(i; 0..buckets.length){
273             auto buck = buckets[i];
274             if (!buck.filled){
275                 continue;
276             }
277             Node* tmp = buck.entry;
278             ks[j++] = tmp.key;
279         }
280 
281         return ks;
282     }
283 
284     /// returning slice must be deallocated like free(values.ptr);
285     V[] values() @nogc nothrow {
286         V[] vals = (cast(V*)malloc(length * V.sizeof))[0..length];
287         size_t j;
288         foreach(i; 0..buckets.length){
289             auto buck = buckets[i];
290             if (!buck.filled){
291                 continue;
292             }
293             Node* tmp = buck.entry;
294             vals[j++] = tmp.val;
295         }
296 
297         return vals;
298     }
299 
300     void clear() @nogc nothrow { // WIP 
301         /+ not sure if this works with this port
302         import core.stdc.string : memset;
303         // clear all data, but don't change bucket array length
304         memset(&buckets[firstUsed], 0, (buckets.length - firstUsed) * Bucket.sizeof);
305         +/
306         // just loop over entire slice
307         foreach(ref b; buckets)
308             if(b.entry !is null){
309                 core.stdc.stdlib.free(b.entry);
310                 b.entry = null;
311             }
312         deleted = used = 0;
313         firstUsed = cast(uint) dim;
314         buckets[] = Bucket.init;
315     }
316 
317     void free() @nogc nothrow {
318         foreach(ref b; buckets)
319             if(b.entry !is null){
320                 core.stdc.stdlib.free(b.entry);
321                 b.entry = null;
322             }
323                 
324         core.stdc.stdlib.free(buckets.ptr);
325         deleted = used = 0;
326         buckets = null;
327     }
328 
329     // TODO: .byKey(), .byValue(), .byKeyValue()
330 }
331 
332 private size_t nextpow2(scope const size_t n) pure nothrow @nogc {
333     import core.bitop : bsr;
334 
335     if (!n)
336         return 1;
337 
338     const isPowerOf2 = !((n - 1) & n);
339     return 1 << (bsr(n) + !isPowerOf2);
340 }
341 
342 private size_t mix(size_t h) @safe pure nothrow @nogc {
343     enum m = 0x5bd1e995;
344     h ^= h >> 13;
345     h *= m;
346     h ^= h >> 15;
347     return h;
348 }
349 
350 private T min(T)(scope const T a, scope const T b) pure nothrow @nogc {
351     return a < b ? a : b;
352 }
353 
354 private T max(T)(scope const T a, scope const T b) pure nothrow @nogc {
355     return b < a ? a : b;
356 }
357 
358 unittest {
359     import core.stdc.stdio;
360     import core.stdc.time;
361 
362     clock_t begin = clock();
363 
364     Bcaa!(int, int) aa0;
365 
366     foreach (i; 0..1000000){
367         aa0[i] = i;
368     }
369 
370     foreach (i; 2000..1000000){
371         aa0.remove(i);
372     }
373 
374     printf("%d \n", aa0[1000]);
375     aa0.free;
376 
377     clock_t end = clock(); printf("Elapsed time: %f \n", cast(double)(end - begin) / CLOCKS_PER_SEC);
378 
379     Bcaa!(string, string) aa1;
380 
381     aa1["Stevie"] = "Ray Vaughan";
382     aa1["Asım Can"] = "Gündüz";
383     aa1["Dan"] = "Patlansky";
384     aa1["İlter"] = "Kurcala";
385     aa1["Ferhat"] = "Kurtulmuş";
386 
387     if (auto valptr = "Dan" in aa1)
388         printf("%s exists!!!!\n", (*valptr).ptr );
389     else
390         printf("does not exist!!!!\n".ptr);
391 
392     assert(aa1.remove("Ferhat") == true);
393     assert(aa1["Ferhat"] == null);
394     assert(aa1.remove("Foe") == false);
395     assert(aa1["İlter"] =="Kurcala");
396 
397     aa1.rehash();
398 
399     printf("%s\n",aa1["Stevie"].ptr);
400     printf("%s\n",aa1["Asım Can"].ptr);
401     printf("%s\n",aa1["Dan"].ptr);
402     printf("%s\n",aa1["Ferhat"].ptr);
403 
404     auto keys = aa1.keys;
405     foreach(key; keys)
406         printf("%s -> %s \n", key.ptr, aa1[key].ptr);
407     core.stdc.stdlib.free(keys.ptr);
408     aa1.free;
409 
410     struct Guitar {
411         string brand;
412     }
413 
414     Bcaa!(int, Guitar) guitars;
415 
416     guitars[0] = Guitar("Fender");
417     guitars[3] = Guitar("Gibson");
418     guitars[356] = Guitar("Stagg");
419 
420     assert(guitars[3].brand == "Gibson");
421 
422     printf("%s \n", guitars[356].brand.ptr);
423 
424     if(auto valPtr = 3 in guitars)
425         printf("%s \n", (*valPtr).brand.ptr);
426 
427     guitars.free;
428 }