Skip to content

Commit a93193c

Browse files
committed
hash.c: use rb_hash_* functions for objid_hash, double and ident.
Note, that ident becomes a bit slower, cause hash value is more "random". New versions calculates fair hash value, and so provides statistically fair collision rate. Previous version relies on internal pattern of symbols and "usually" provides lesser collisions. Though it could be compromised easely.
1 parent 347bcb1 commit a93193c

File tree

4 files changed

+22
-52
lines changed

4 files changed

+22
-52
lines changed

hash.c

+19-36
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,7 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
175175
}
176176
else if (BUILTIN_TYPE(a) == T_FLOAT) {
177177
flt:
178-
hval = rb_dbl_hash(rb_float_value(a));
179-
hnum = FIX2LONG(hval);
178+
hnum = rb_dbl_hash_long(rb_float_value(a));
180179
}
181180
else {
182181
hnum = other_func(a);
@@ -199,34 +198,29 @@ rb_any_hash(VALUE a)
199198
return any_hash(a, obj_any_hash);
200199
}
201200

202-
static st_index_t
203-
rb_num_hash_start(st_index_t n)
201+
long
202+
rb_dbl_hash_long(double d)
204203
{
205-
/*
206-
* This hash function is lightly-tuned for Ruby. Further tuning
207-
* should be possible. Notes:
208-
*
209-
* - (n >> 3) alone is great for heap objects and OK for fixnum,
210-
* however symbols perform poorly.
211-
* - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
212-
* n.b.: +3 to remove most ID scope, +1 worked well initially, too
213-
* n.b.: +1 (instead of 3) worked well initially, too
214-
* - (n << 16) was finally added to avoid losing bits for fixnums
215-
* - avoid expensive modulo instructions, it is currently only
216-
* shifts and bitmask operations.
217-
*/
218-
return (n >> (RUBY_SPECIAL_SHIFT + 3) ^ (n << 16)) ^ (n >> 3);
204+
long hash;
205+
unsigned i;
206+
#define ind_in_dbl type_roomof(double, st_index_t)
207+
union {
208+
st_index_t i[ind_in_dbl];
209+
double d;
210+
} v = { {0} };
211+
/* normalize -0.0 to 0.0 */
212+
v.d = d == 0.0 ? 0.0 : d;
213+
hash = rb_hash_start(v.i[0]);
214+
for (i = 1; i < ind_in_dbl; i++) {
215+
hash = rb_hash_uint(hash, v.i[i]);
216+
}
217+
return rb_hash_end(hash);
219218
}
220219

221220
long
222221
rb_objid_hash(st_index_t index)
223222
{
224-
st_index_t hnum = rb_num_hash_start(index);
225-
226-
hnum = rb_hash_start(hnum);
227-
hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash);
228-
hnum = rb_hash_end(hnum);
229-
return hnum;
223+
return rb_hash_end(index);
230224
}
231225

232226
static st_index_t
@@ -258,18 +252,7 @@ static const struct st_hash_type objhash = {
258252
static st_index_t
259253
rb_ident_hash(st_data_t n)
260254
{
261-
#ifdef USE_FLONUM /* RUBY */
262-
/*
263-
* - flonum (on 64-bit) is pathologically bad, mix the actual
264-
* float value in, but do not use the float value as-is since
265-
* many integers get interpreted as 2.0 or -2.0 [Bug #10761]
266-
*/
267-
if (FLONUM_P(n)) {
268-
n ^= (st_data_t)rb_float_value(n);
269-
}
270-
#endif
271-
272-
return (st_index_t)rb_num_hash_start((st_index_t)n);
255+
return rb_hash_end((st_index_t)n);
273256
}
274257

275258
static const struct st_hash_type identhash = {

internal.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1080,7 +1080,6 @@ struct st_table *rb_hash_tbl_raw(VALUE hash);
10801080
VALUE rb_hash_has_key(VALUE hash, VALUE key);
10811081
VALUE rb_hash_default_value(VALUE hash, VALUE key);
10821082
VALUE rb_hash_set_default_proc(VALUE hash, VALUE proc);
1083-
long rb_objid_hash(st_index_t index);
10841083
st_table *rb_init_identtable(void);
10851084
st_table *rb_init_identtable_with_size(st_index_t size);
10861085

@@ -1147,6 +1146,7 @@ VALUE rb_int_modulo(VALUE x, VALUE y);
11471146
VALUE rb_int_round(VALUE num, int ndigits);
11481147
VALUE rb_int2str(VALUE num, int base);
11491148
VALUE rb_dbl_hash(double d);
1149+
long rb_dbl_hash_long(double d);
11501150
VALUE rb_fix_plus(VALUE x, VALUE y);
11511151
VALUE rb_int_ge(VALUE x, VALUE y);
11521152

numeric.c

+1-6
Original file line numberDiff line numberDiff line change
@@ -1350,12 +1350,7 @@ flo_hash(VALUE num)
13501350
VALUE
13511351
rb_dbl_hash(double d)
13521352
{
1353-
st_index_t hash;
1354-
1355-
/* normalize -0.0 to 0.0 */
1356-
if (d == 0.0) d = 0.0;
1357-
hash = rb_memhash(&d, sizeof(d));
1358-
return LONG2FIX(hash);
1353+
return LONG2FIX(rb_dbl_hash_long(d));
13591354
}
13601355

13611356
VALUE

object.c

+1-9
Original file line numberDiff line numberDiff line change
@@ -162,15 +162,7 @@ rb_obj_equal(VALUE obj1, VALUE obj2)
162162
VALUE
163163
rb_obj_hash(VALUE obj)
164164
{
165-
VALUE oid = rb_obj_id(obj);
166-
#if SIZEOF_LONG == SIZEOF_VOIDP
167-
st_index_t index = NUM2LONG(oid);
168-
#elif SIZEOF_LONG_LONG == SIZEOF_VOIDP
169-
st_index_t index = NUM2LL(oid);
170-
#else
171-
# error not supported
172-
#endif
173-
return LONG2FIX(rb_objid_hash(index));
165+
/* stub for documentation */
174166
}
175167
#else
176168
VALUE rb_obj_hash(VALUE obj);

0 commit comments

Comments
 (0)