Skip to content

Commit 135b96f

Browse files
ncik-robertsxclerc
andauthored
flambda-backend: Hash of mixed blocks raises an exception (#2429)
* caml_hash -> caml_hash_exn * Make hash raise * fix * make fmt * Update ocaml/stdlib/char.ml Co-authored-by: Xavier Clerc <[email protected]> * make bootstrap --------- Co-authored-by: Xavier Clerc <[email protected]>
1 parent ecbe37a commit 135b96f

File tree

18 files changed

+93
-48
lines changed

18 files changed

+93
-48
lines changed

boot/ocamlc

1013 Bytes
Binary file not shown.

boot/ocamllex

48 Bytes
Binary file not shown.

runtime/hash.c

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "caml/custom.h"
2525
#include "caml/memory.h"
2626
#include "caml/hash.h"
27+
#include "caml/fail.h"
2728

2829
/* The implementation based on MurmurHash 3,
2930
https://github.com/aappleby/smhasher/ */
@@ -181,7 +182,14 @@ CAMLexport uint32_t caml_hash_mix_string(uint32_t h, value s)
181182

182183
/* The generic hash function */
183184

184-
CAMLprim value caml_hash(value count, value limit, value seed, value obj)
185+
/* Internally to Jane Street, we have renamed [caml_hash] to [caml_hash_exn]
186+
to suggest that calling it could raise. (E.g. it raises on mixed blocks.)
187+
As such, we've removed [@@noalloc] from the OCaml [external] that references
188+
this C binding, and would likewise need to remove [@@noalloc] from any
189+
other [external] formerly bound to [caml_hash].
190+
*/
191+
192+
CAMLprim value caml_hash_exn(value count, value limit, value seed, value obj)
185193
{
186194
value queue[HASH_QUEUE_SIZE]; /* Queue of values to examine */
187195
intnat rd; /* Position of first value in queue */
@@ -279,20 +287,16 @@ CAMLprim value caml_hash(value count, value limit, value seed, value obj)
279287
break;
280288

281289
default:
290+
if (Is_mixed_block_reserved(Reserved_val(v))) {
291+
caml_invalid_argument("hash: mixed block value");
292+
}
282293
/* Mix in the tag and size, but do not count this towards [num] */
283294
h = caml_hash_mix_uint32(h, Cleanhd_hd(Hd_val(v)));
284295
/* Copy fields into queue, not exceeding the total size [sz] */
285-
for (i = 0, len = Scannable_wosize_val(v); i < len; i++) {
296+
for (i = 0, len = Wosize_val(v); i < len; i++) {
286297
if (wr >= sz) break;
287298
queue[wr++] = Field(v, i);
288299
}
289-
290-
/* We don't attempt to hash the flat suffix of a mixed block.
291-
This is consistent with abstract blocks which, like mixed
292-
blocks, cause polymorphic comparison to raise and don't
293-
attempt to hash the non-scannable portion.
294-
*/
295-
296300
break;
297301
}
298302
}

runtime4/gc_ctrl.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ static intnat norm_minsize (intnat s)
409409
if (s > Minor_heap_max) s = Minor_heap_max;
410410
/* PR#9128 : Make sure the minor heap occupies an integral number of
411411
pages, so that no page contains both bytecode and OCaml
412-
values. This would confuse, e.g., caml_hash. */
412+
values. This would confuse, e.g., caml_hash_exn. */
413413
s = (s + page_wsize - 1) / page_wsize * page_wsize;
414414
return s;
415415
}

runtime4/hash.c

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "caml/custom.h"
2525
#include "caml/memory.h"
2626
#include "caml/hash.h"
27+
#include "caml/fail.h"
2728

2829
/* The implementation based on MurmurHash 3,
2930
https://github.com/aappleby/smhasher/ */
@@ -181,7 +182,13 @@ CAMLexport uint32_t caml_hash_mix_string(uint32_t h, value s)
181182

182183
/* The generic hash function */
183184

184-
CAMLprim value caml_hash(value count, value limit, value seed, value obj)
185+
/* Internally to Jane Street, we have renamed [caml_hash] to [caml_hash_exn]
186+
to suggest that calling it could raise. (E.g. it raises on mixed blocks.)
187+
As such, we've removed [@@noalloc] from the OCaml [external] that references
188+
this C binding, and would likewise need to remove [@@noalloc] from any
189+
other [external] formerly bound to [caml_hash].
190+
*/
191+
CAMLprim value caml_hash_exn(value count, value limit, value seed, value obj)
185192
{
186193
value queue[HASH_QUEUE_SIZE]; /* Queue of values to examine */
187194
intnat rd; /* Position of first value in queue */
@@ -283,20 +290,16 @@ CAMLprim value caml_hash(value count, value limit, value seed, value obj)
283290
}
284291
#endif
285292
default:
293+
if (Is_mixed_block_reserved(Reserved_val(v))) {
294+
caml_invalid_argument("hash: mixed block value");
295+
}
286296
/* Mix in the tag and size, but do not count this towards [num] */
287297
h = caml_hash_mix_uint32(h, Whitehd_hd(Hd_val(v)));
288298
/* Copy fields into queue, not exceeding the total size [sz] */
289-
for (i = 0, len = Scannable_wosize_val(v); i < len; i++) {
299+
for (i = 0, len = Wosize_val(v); i < len; i++) {
290300
if (wr >= sz) break;
291301
queue[wr++] = Field(v, i);
292302
}
293-
294-
/* We don't attempt to hash the flat suffix of a mixed block.
295-
This is consistent with abstract blocks which, like mixed
296-
blocks, cause polymorphic comparison to raise and don't
297-
attempt to hash the non-scannable portion.
298-
*/
299-
300303
break;
301304
}
302305
}

stdlib/bool.ml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,11 @@ let of_string = function
3737

3838
let to_string = function false -> "false" | true -> "true"
3939

40+
(* [caml_hash_exn] doesn't raise on booleans, so it's safe for
41+
it to be marked as [@@noalloc].
42+
*)
4043
external seeded_hash_param :
41-
int -> int -> int -> 'a -> int = "caml_hash" [@@noalloc]
44+
int -> int -> int -> bool -> int = "caml_hash_exn" [@@noalloc]
45+
4246
let seeded_hash seed x = seeded_hash_param 10 100 seed x
4347
let hash x = seeded_hash_param 10 100 0 x

stdlib/char.ml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,10 @@ type t = char
6464
let compare c1 c2 = code c1 - code c2
6565
let equal (c1: t) (c2: t) = compare c1 c2 = 0
6666

67+
(* [caml_hash_exn] doesn't raise on chars, so it's safe for
68+
it to be marked as [@@noalloc].
69+
*)
6770
external seeded_hash_param :
68-
int -> int -> int -> 'a -> int = "caml_hash" [@@noalloc]
71+
int -> int -> int -> char -> int = "caml_hash_exn" [@@noalloc]
6972
let seeded_hash seed x = seeded_hash_param 10 100 seed x
7073
let hash x = seeded_hash_param 10 100 0 x

stdlib/float.ml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,11 @@ let[@inline] min_max_num (x: float) (y: float) =
171171
else if is_nan y then (x,x)
172172
else if y > x || (not(sign_bit y) && sign_bit x) then (x,y) else (y,x)
173173

174+
(* [caml_hash_exn] doesn't raise on floats, so it's safe for
175+
it to be marked as [@@noalloc].
176+
*)
174177
external seeded_hash_param :
175-
int -> int -> int -> 'a -> int = "caml_hash" [@@noalloc]
178+
int -> int -> int -> float -> int = "caml_hash_exn" [@@noalloc]
176179
let seeded_hash seed x = seeded_hash_param 10 100 seed x
177180
let hash x = seeded_hash_param 10 100 0 x
178181

stdlib/hashtbl.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,7 @@ module Make(H: HashedType): (S with type key = H.t) =
501501
use - see #2202 *)
502502

503503
external seeded_hash_param :
504-
int -> int -> int -> 'a -> int = "caml_hash" [@@noalloc]
504+
int -> int -> int -> 'a -> int = "caml_hash_exn"
505505

506506
let hash x = seeded_hash_param 10 100 0 x
507507
let hash_param n1 n2 x = seeded_hash_param n1 n2 0 x

stdlib/int.ml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,10 @@ let of_string s = try Some (int_of_string s) with Failure _ -> None
5656
external format_int : string -> int -> string = "caml_format_int"
5757
let to_string x = format_int "%d" x
5858

59+
(* [caml_hash_exn] doesn't raise on ints, so it's safe for
60+
it to be marked as [@@noalloc].
61+
*)
5962
external seeded_hash_param :
60-
int -> int -> int -> 'a -> int = "caml_hash" [@@noalloc]
63+
int -> int -> int -> int -> int = "caml_hash_exn" [@@noalloc]
6164
let seeded_hash seed x = seeded_hash_param 10 100 seed x
6265
let hash x = seeded_hash_param 10 100 0 x

stdlib/int32.ml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,10 @@ let[@inline available] unsigned_div n d =
109109
let[@inline available] unsigned_rem n d =
110110
sub n (mul ((unsigned_div[@inlined]) n d) d)
111111

112+
(* [caml_hash_exn] doesn't raise on int32s, so it's safe for
113+
it to be marked as [@@noalloc].
114+
*)
112115
external seeded_hash_param :
113-
int -> int -> int -> 'a -> int = "caml_hash" [@@noalloc]
116+
int -> int -> int -> int32 -> int = "caml_hash_exn" [@@noalloc]
114117
let seeded_hash seed x = seeded_hash_param 10 100 seed x
115118
let hash x = seeded_hash_param 10 100 0 x

stdlib/int64.ml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,10 @@ let[@inline available] unsigned_div n d =
108108
let[@inline available] unsigned_rem n d =
109109
sub n (mul ((unsigned_div[@inlined]) n d) d)
110110

111+
(* [caml_hash_exn] doesn't raise on int64s, so it's safe for
112+
it to be marked as [@@noalloc].
113+
*)
111114
external seeded_hash_param :
112-
int -> int -> int -> 'a -> int = "caml_hash" [@@noalloc]
115+
int -> int -> int -> int64 -> int = "caml_hash_exn" [@@noalloc]
113116
let seeded_hash seed x = seeded_hash_param 10 100 seed x
114117
let hash x = seeded_hash_param 10 100 0 x

stdlib/nativeint.ml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,10 @@ let[@inline available] unsigned_div n d =
9898
let[@inline available] unsigned_rem n d =
9999
sub n (mul ((unsigned_div[@inlined]) n d) d)
100100

101+
(* [caml_hash_exn] doesn't raise on nativeints, so it's safe for
102+
it to be marked as [@@noalloc].
103+
*)
101104
external seeded_hash_param :
102-
int -> int -> int -> 'a -> int = "caml_hash" [@@noalloc]
105+
int -> int -> int -> nativeint -> int = "caml_hash_exn" [@@noalloc]
103106
let seeded_hash seed x = seeded_hash_param 10 100 seed x
104107
let hash x = seeded_hash_param 10 100 0 x

testsuite/tests/mixed-blocks/generate_mixed_blocks_code.ml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,6 @@ let copy_via_tag x =
334334
line {|let oc = Out_channel.open_bin "/dev/null"|};
335335
line {|exception Unexpected_success|};
336336
line {|type forget = T : _ -> forget|};
337-
line {|let try_hash x = ignore (Hashtbl.hash x : int)|};
338337
if not bytecode
339338
then
340339
line
@@ -344,6 +343,9 @@ let expect_failure f =
344343
| Unexpected_success -> assert false
345344
| _ -> ()
346345

346+
let try_hash x =
347+
expect_failure (fun () -> ignore (Hashtbl.hash x : int))
348+
347349
let try_compare x y =
348350
expect_failure (fun () -> ignore (compare (T x) (T y) : int));
349351
expect_failure (fun () -> ignore ((T x) = (T y) : bool))
@@ -356,6 +358,9 @@ let try_marshal t =
356358
*)
357359
line
358360
{|
361+
let try_hash x =
362+
ignore (Hashtbl.hash x : int)
363+
359364
let try_compare x y =
360365
ignore (compare (T x) (T y) : int);
361366
ignore ((T x) = (T y) : bool)

testsuite/tests/mixed-blocks/generated_byte_test.ml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ let copy_via_tag x =
3434
let oc = Out_channel.open_bin "/dev/null"
3535
exception Unexpected_success
3636
type forget = T : _ -> forget
37-
let try_hash x = ignore (Hashtbl.hash x : int)
37+
38+
let try_hash x =
39+
ignore (Hashtbl.hash x : int)
3840

3941
let try_compare x y =
4042
ignore (compare (T x) (T y) : int);

testsuite/tests/mixed-blocks/generated_native_test.ml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,15 @@ let copy_via_tag x =
3535
let oc = Out_channel.open_bin "/dev/null"
3636
exception Unexpected_success
3737
type forget = T : _ -> forget
38-
let try_hash x = ignore (Hashtbl.hash x : int)
3938

4039
let expect_failure f =
4140
try f (); raise Unexpected_success with
4241
| Unexpected_success -> assert false
4342
| _ -> ()
4443

44+
let try_hash x =
45+
expect_failure (fun () -> ignore (Hashtbl.hash x : int))
46+
4547
let try_compare x y =
4648
expect_failure (fun () -> ignore (compare (T x) (T y) : int));
4749
expect_failure (fun () -> ignore ((T x) = (T y) : bool))

testsuite/tests/mixed-blocks/hash.ml

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,20 @@
2020

2121
(* Currently bytecode/native hashes of mixed records are different.
2222
Mixed records are represented as mixed blocks in native code and
23-
normal blocks in bytecode. We don't make any special effort to get
24-
their hash values to line up. This is something we could consider
25-
revisiting -- the simplest way to accomplish this may well be to
26-
support mixed blocks in bytecode.
27-
*)
23+
normal blocks in bytecode. Hash raises on mixed blocks (but not
24+
on normal blocks).
25+
26+
We could consider making native hash return a value instead. But,
27+
if it's a different value than bytecode, it's important that users
28+
can't marshal mixed block values from native code to bytecode.
29+
(Otherwise you could marshal a hashtable, breaking its invariants.)
30+
*)
31+
32+
let hash x =
33+
match Hashtbl.hash x with
34+
| exception exn -> Printf.sprintf "raised %s" (Printexc.to_string exn)
35+
| i -> string_of_int i
2836

29-
let hash = Hashtbl.hash
3037
let printf = Printf.printf
3138

3239
let () = printf "All Float Mixed Records\n"
@@ -39,7 +46,7 @@ let () =
3946
}
4047
end in
4148
hash { x = 4.0; y = #5.1 }
42-
|> printf "\t{ x : float; y : float# } = %d\n"
49+
|> printf "\t{ x : float; y : float# } = %s\n"
4350

4451

4552
let () =
@@ -50,7 +57,7 @@ let () =
5057
}
5158
end in
5259
hash { x = #4.0; y = 5.1 }
53-
|> printf "\t{ x : float#; y : float } = %d\n"
60+
|> printf "\t{ x : float#; y : float } = %s\n"
5461

5562
let () = printf "General Mixed Records\n"
5663

@@ -63,7 +70,7 @@ let () =
6370
}
6471
end in
6572
hash { x = "abc"; y = #5.1 }
66-
|> printf "\t{ x : string; y : float# } = %d\n"
73+
|> printf "\t{ x : string; y : float# } = %s\n"
6774

6875
let () =
6976
let open struct
@@ -73,7 +80,7 @@ let () =
7380
}
7481
end in
7582
hash { x = 23940; y = #5.1 }
76-
|> printf "\t{ x : int; y : float# } = %d\n"
83+
|> printf "\t{ x : int; y : float# } = %s\n"
7784

7885
let () =
7986
let open struct
@@ -84,7 +91,7 @@ let () =
8491
}
8592
end in
8693
hash { x = 23940; y = #5.1; z = 1340 }
87-
|> printf "\t{ x : int; y : float#; z : int } = %d\n"
94+
|> printf "\t{ x : int; y : float#; z : int } = %s\n"
8895

8996
let () =
9097
let open struct
@@ -96,4 +103,4 @@ let () =
96103
}
97104
end in
98105
hash { a = "abc"; x = 23940; y = #5.1; z = 1340 }
99-
|> printf "\t{ a : string; x : int; y : float#; z : int } = %d\n"
106+
|> printf "\t{ a : string; x : int; y : float#; z : int } = %s\n"
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
All Float Mixed Records
2-
{ x : float; y : float# } = 980550003
3-
{ x : float#; y : float } = 980550003
2+
{ x : float; y : float# } = raised Invalid_argument("hash: mixed block value")
3+
{ x : float#; y : float } = raised Invalid_argument("hash: mixed block value")
44
General Mixed Records
5-
{ x : string; y : float# } = 883581425
6-
{ x : int; y : float# } = 980550003
7-
{ x : int; y : float#; z : int } = 531159882
8-
{ a : string; x : int; y : float#; z : int } = 677901263
5+
{ x : string; y : float# } = raised Invalid_argument("hash: mixed block value")
6+
{ x : int; y : float# } = raised Invalid_argument("hash: mixed block value")
7+
{ x : int; y : float#; z : int } = raised Invalid_argument("hash: mixed block value")
8+
{ a : string; x : int; y : float#; z : int } = raised Invalid_argument("hash: mixed block value")

0 commit comments

Comments
 (0)