oxcaml · mshinwell · Jan 4, 2024 · Oct 12, 2023 · Dec 20, 2023 · Dec 21, 2023
diff --git a/backend/cmm_helpers.ml b/backend/cmm_helpers.ml
@@ -64,6 +64,7 @@ let mk_load_atomic memory_chunk =
 
 let floatarray_tag dbg = Cconst_int (Obj.double_array_tag, dbg)
 
+(* CR mshinwell: update to use NOT_MARKABLE terminology *)
 let block_header tag sz =
   Nativeint.add
     (Nativeint.shift_left (Nativeint.of_int sz) 10)
@@ -118,6 +119,12 @@ let boxedint64_local_header =
 
 let boxedintnat_local_header = local_block_header Obj.custom_tag 2
 
+let black_custom_header ~size = black_block_header Obj.custom_tag size
+
+let custom_header ~size = block_header Obj.custom_tag size
+
+let custom_local_header ~size = local_block_header Obj.custom_tag size
+
 let caml_nativeint_ops = "caml_nativeint_ops"
 
 let caml_int32_ops = "caml_int32_ops"
@@ -765,8 +772,25 @@ let complex_im c dbg =
 
 let return_unit dbg c = Csequence (c, Cconst_int (1, dbg))
 
-let field_address ptr n dbg =
-  if n = 0 then ptr else Cop (Cadda, [ptr; Cconst_int (n * size_addr, dbg)], dbg)
+let field_address ?(memory_chunk = Word_val) ptr n dbg =
+  if n = 0
+  then ptr
+  else
+    let field_size_in_bytes =
+      match memory_chunk with
+      | Byte_unsigned | Byte_signed -> 1
+      | Sixteen_unsigned | Sixteen_signed -> 2
+      | Thirtytwo_unsigned | Thirtytwo_signed -> 4
+      | Single ->
+        assert (size_float = 8);
+        (* unclear what to do if this is false *)
+        size_float / 2
+      | Word_int -> size_int
+      | Word_val -> size_addr
+      | Double -> size_float
+      | Onetwentyeight_unaligned | Onetwentyeight_aligned -> size_vec128
+    in
+    Cop (Cadda, [ptr; Cconst_int (n * field_size_in_bytes, dbg)], dbg)
 
 let get_field_gen_given_memory_chunk memory_chunk mutability ptr n dbg =
   Cop
@@ -892,6 +916,57 @@ let array_indexing ?typ log2size ptr ofs dbg =
           Cconst_int (-1 lsl (log2size - 1), dbg) ],
         dbg )
 
+(* CR Gbury: this conversion int -> nativeint is potentially unsafe when
+   cross-compiling for 64-bit on a 32-bit host *)
+let int ~dbg i = natint_const_untagged dbg (Nativeint.of_int i)
+
+let custom_ops_unboxed_int32_odd_array =
+  Cconst_symbol (Cmm.global_symbol "_unboxed_int32_odd_array", Debuginfo.none)
+
+let custom_ops_unboxed_int32_even_array =
+  Cconst_symbol (Cmm.global_symbol "_unboxed_int32_even_array", Debuginfo.none)
+
+let custom_ops_unboxed_int64_array =
+  Cconst_symbol (Cmm.global_symbol "_unboxed_int64_array", Debuginfo.none)
+
+let custom_ops_unboxed_nativeint_array =
+  Cconst_symbol (Cmm.global_symbol "_unboxed_nativeint_array", Debuginfo.none)
+
+let unboxed_int32_array_length arr dbg =
+  (* A dynamic test is needed to determine if the array contains an odd or even
+     number of elements *)
+  bind "arr" arr (fun arr ->
+      let custom_ops_var = Backend_var.create_local "custom_ops" in
+      let num_words_var = Backend_var.create_local "num_words" in
+      Clet
+        ( VP.create num_words_var,
+          (* need to subtract so as not to count the custom_operations field *)
+          sub_int (get_size arr dbg) (int ~dbg 1) dbg,
+          Clet
+            ( VP.create custom_ops_var,
+              Cop (mk_load_immut Word_int, [arr], dbg),
+              Cifthenelse
+                ( Cop
+                    ( Ccmpa Ceq,
+                      [Cvar custom_ops_var; custom_ops_unboxed_int32_odd_array],
+                      dbg ),
+                  dbg,
+                  (* unboxed int32 odd *)
+                  (sub_int
+                     (mul_int (Cvar num_words_var) (int ~dbg 2) dbg)
+                     (int ~dbg 1))
+                    dbg,
+                  dbg,
+                  (* assumed to be unboxed int32 even *)
+                  mul_int (Cvar num_words_var) (int ~dbg 2) dbg,
+                  dbg,
+                  Any ) ) ))
+
+let unboxed_int64_or_nativeint_array_length arr dbg =
+  bind "arr" arr (fun arr ->
+      (* need to subtract so as not to count the custom_operations field *)
+      sub_int (get_size arr dbg) (int ~dbg 1) dbg)
+
 let addr_array_ref arr ofs dbg =
   Cop (mk_load_mut Word_val, [array_indexing log2_size_addr arr ofs dbg], dbg)
 
@@ -968,6 +1043,86 @@ let addr_array_initialize arr ofs newval dbg =
       [array_indexing log2_size_addr arr ofs dbg; newval],
       dbg )
 
+(* low_32 x is a value which agrees with x on at least the low 32 bits *)
+let rec low_32 dbg = function
+  (* Ignore sign and zero extensions, which do not affect the low bits *)
+  | Cop (Casr, [Cop (Clsl, [x; Cconst_int (32, _)], _); Cconst_int (32, _)], _)
+  | Cop (Cand, [x; Cconst_natint (0xFFFFFFFFn, _)], _) ->
+    low_32 dbg x
+  | Clet (id, e, body) -> Clet (id, e, low_32 dbg body)
+  | x -> x
+
+(* sign_extend_32 sign-extends values from 32 bits to the word size. *)
+let sign_extend_32 dbg e =
+  match low_32 dbg e with
+  | Cop
+      ( Cload
+          { memory_chunk = Thirtytwo_unsigned | Thirtytwo_signed;
+            mutability;
+            is_atomic
+          },
+        args,
+        dbg ) ->
+    Cop
+      ( Cload { memory_chunk = Thirtytwo_signed; mutability; is_atomic },
+        args,
+        dbg )
+  | e ->
+    Cop
+      ( Casr,
+        [Cop (Clsl, [e; Cconst_int (32, dbg)], dbg); Cconst_int (32, dbg)],
+        dbg )
+
+let unboxed_int32_array_ref arr index dbg =
+  bind "arr" arr (fun arr ->
+      bind "index" index (fun index ->
+          let index =
+            (* Need to skip the custom_operations field. We add 2 not 1 since
+               the call to [array_indexing], below, is in terms of 32-bit
+               words. *)
+            add_int index (int ~dbg 2) dbg
+          in
+          let log2_size_addr = 2 in
+          (* N.B. The resulting value will be sign extended by the code
+             generated for a [Thirtytwo_signed] load. *)
+          Cop
+            ( mk_load_mut Thirtytwo_signed,
+              [array_indexing log2_size_addr arr index dbg],
+              dbg )))
+
+let unboxed_int64_or_nativeint_array_ref arr index dbg =
+  bind "arr" arr (fun arr ->
+      bind "index" index (fun index ->
+          let index =
+            (* Need to skip the custom_operations field *)
+            add_int index (int ~dbg 1) dbg
+          in
+          int_array_ref arr index dbg))
+
+let unboxed_int32_array_set arr ~index ~new_value dbg =
+  bind "arr" arr (fun arr ->
+      bind "index" index (fun index ->
+          bind "new_value" new_value (fun new_value ->
+              let index =
+                (* See comment in [unboxed_int32_array_ref]. *)
+                add_int index (int ~dbg 2) dbg
+              in
+              let log2_size_addr = 2 in
+              Cop
+                ( Cstore (Thirtytwo_signed, Assignment),
+                  [array_indexing log2_size_addr arr index dbg; new_value],
+                  dbg ))))
+
+let unboxed_int64_or_nativeint_array_set arr ~index ~new_value dbg =
+  bind "arr" arr (fun arr ->
+      bind "index" index (fun index ->
+          bind "new_value" new_value (fun new_value ->
+              let index =
+                (* See comment in [unboxed_int64_or_nativeint_array_ref]. *)
+                add_int index (int ~dbg 1) dbg
+              in
+              int_array_set arr index new_value dbg)))
+
 (* Get the field of a block given a possibly inconstant index *)
 
 let get_field_computed imm_or_ptr mutability ~block ~index dbg =
@@ -1311,15 +1466,6 @@ let check_64_bit_target func =
     Misc.fatal_errorf
       "Cmm helpers function %s can only be used on 64-bit targets" func
 
-(* low_32 x is a value which agrees with x on at least the low 32 bits *)
-let rec low_32 dbg = function
-  (* Ignore sign and zero extensions, which do not affect the low bits *)
-  | Cop (Casr, [Cop (Clsl, [x; Cconst_int (32, _)], _); Cconst_int (32, _)], _)
-  | Cop (Cand, [x; Cconst_natint (0xFFFFFFFFn, _)], _) ->
-    low_32 dbg x
-  | Clet (id, e, body) -> Clet (id, e, low_32 dbg body)
-  | x -> x
-
 (* Like [low_32] but for 63-bit integers held in 64-bit registers. *)
 (* CR gbury: Why not use Cmm.map_tail here ? It seems designed for that kind of
    thing (and covers more cases than just Clet). *)
@@ -1333,27 +1479,6 @@ let rec low_63 dbg e =
   | Clet (id, x, body) -> Clet (id, x, low_63 dbg body)
   | _ -> e
 
-(* sign_extend_32 sign-extends values from 32 bits to the word size. *)
-let sign_extend_32 dbg e =
-  match low_32 dbg e with
-  | Cop
-      ( Cload
-          { memory_chunk = Thirtytwo_unsigned | Thirtytwo_signed;
-            mutability;
-            is_atomic
-          },
-        args,
-        dbg ) ->
-    Cop
-      ( Cload { memory_chunk = Thirtytwo_signed; mutability; is_atomic },
-        args,
-        dbg )
-  | e ->
-    Cop
-      ( Casr,
-        [Cop (Clsl, [e; Cconst_int (32, dbg)], dbg); Cconst_int (32, dbg)],
-        dbg )
-
 (* CR-someday mshinwell/gbury: sign_extend_63 then tag_int should simplify to
    just tag_int. Similarly, untag_int then sign_extend_63 should simplify to
    untag_int. *)
@@ -2756,8 +2881,13 @@ let arraylength kind arg dbg =
     Cop (Cor, [len; Cconst_int (1, dbg)], dbg)
   | Paddrarray | Pintarray ->
     Cop (Cor, [addr_array_length_shifted hdr dbg; Cconst_int (1, dbg)], dbg)
-  | Pfloatarray ->
+  | Pfloatarray | Punboxedfloatarray ->
+    (* Note: we only support 64 bit targets now, so this is ok for
+       Punboxedfloatarray *)
     Cop (Cor, [float_array_length_shifted hdr dbg; Cconst_int (1, dbg)], dbg)
+  | Punboxedintarray Pint64 | Punboxedintarray Pnativeint ->
+    unboxed_int64_or_nativeint_array_length arg dbg
+  | Punboxedintarray Pint32 -> unboxed_int32_array_length arg dbg
 
 (* CR-soon gyorsh: effects and coeffects for primitives are set conservatively
    to Arbitrary_effects and Has_coeffects, resp. Check if this can be improved
@@ -3210,10 +3340,6 @@ let symbol ~dbg sym = Cconst_symbol (sym, dbg)
 
 let float ~dbg f = Cconst_float (f, dbg)
 
-(* CR Gbury: this conversion int -> nativeint is potentially unsafe when
-   cross-compiling for 64-bit on a 32-bit host *)
-let int ~dbg i = natint_const_untagged dbg (Nativeint.of_int i)
-
 let int32 ~dbg i = natint_const_untagged dbg (Nativeint.of_int32 i)
 
 (* CR Gbury: this conversion int64 -> nativeint is potentially unsafe when
@@ -3601,3 +3727,67 @@ let atomic_compare_and_set ~dbg atomic ~old_value ~new_value =
         },
       [atomic; old_value; new_value],
       dbg )
+
+type even_or_odd =
+  | Even
+  | Odd
+
+let make_unboxed_int32_array_payload dbg unboxed_int32_list =
+  (* CR mshinwell/gbury: potential big-endian implementations:
+   *
+   *  let i =
+   *    if big_endian
+   *    then Cop (Clsl, [a; Cconst_int (32, dbg)], dbg)
+   *    else a
+   *  in
+   *   ...
+   *  let i =
+   *    if big_endian
+   *    then Cop (Cor, [Cop (Clsl, [a; Cconst_int (32, dbg)], dbg); b], dbg)
+   *    else Cop (Cor, [a; Cop (Clsl, [b; Cconst_int (32, dbg)], dbg)], dbg)
+   *  in
+   *)
+  if Sys.big_endian
+  then
+    Misc.fatal_error "Big-endian platforms not yet supported for unboxed arrays";
+  let rec aux acc = function
+    | [] -> Even, List.rev acc
+    | a :: [] -> Odd, List.rev (a :: acc)
+    | a :: b :: r ->
+      let i = Cop (Cor, [a; Cop (Clsl, [b; Cconst_int (32, dbg)], dbg)], dbg) in
+      aux (i :: acc) r
+  in
+  aux [] unboxed_int32_list
+
+let allocate_unboxed_int32_array ~elements (mode : Lambda.alloc_mode) dbg =
+  let num_elts, payload = make_unboxed_int32_array_payload dbg elements in
+  let header =
+    let size = 1 (* custom_ops field *) + List.length payload in
+    match mode with
+    | Alloc_heap -> custom_header ~size
+    | Alloc_local -> custom_local_header ~size
+  in
+  let custom_ops =
+    (* For odd-length unboxed int32 arrays there are 32 bits spare at the end of
+       the block, which are never read. *)
+    match num_elts with
+    | Even -> custom_ops_unboxed_int32_even_array
+    | Odd -> custom_ops_unboxed_int32_odd_array
+  in
+  Cop (Calloc mode, Cconst_natint (header, dbg) :: custom_ops :: payload, dbg)
+
+let allocate_unboxed_int64_or_nativeint_array custom_ops ~elements
+    (mode : Lambda.alloc_mode) dbg =
+  let header =
+    let size = 1 (* custom_ops field *) + List.length elements in
+    match mode with
+    | Alloc_heap -> custom_header ~size
+    | Alloc_local -> custom_local_header ~size
+  in
+  Cop (Calloc mode, Cconst_natint (header, dbg) :: custom_ops :: elements, dbg)
+
+let allocate_unboxed_int64_array =
+  allocate_unboxed_int64_or_nativeint_array custom_ops_unboxed_int64_array
+
+let allocate_unboxed_nativeint_array =
+  allocate_unboxed_int64_or_nativeint_array custom_ops_unboxed_nativeint_array