Skip to content

Commit 40ddf54

Browse files
authored
flambda-backend: Runtime helpers for 128-bit vectors (#1897)
1 parent a336b70 commit 40ddf54

File tree

3 files changed

+88
-2
lines changed

3 files changed

+88
-2
lines changed

runtime/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,15 @@ BYTECODE_C_SOURCES := $(addsuffix .c, \
2323
interp misc stacks fix_code startup_aux startup_byt freelist major_gc \
2424
minor_gc memory alloc roots_byt globroots fail_byt signals \
2525
signals_byt printexc backtrace_byt backtrace compare ints eventlog \
26-
floats str array io extern intern hash sys meta parsing gc_ctrl md5 obj \
26+
floats simd str array io extern intern hash sys meta parsing gc_ctrl md5 obj \
2727
lexing callback debugger weak compact finalise custom dynlink \
2828
afl $(UNIX_OR_WIN32) bigarray main memprof domain \
2929
skiplist codefrag)
3030

3131
NATIVE_C_SOURCES := $(addsuffix .c, \
3232
startup_aux startup_nat main fail_nat roots_nat signals \
3333
signals_nat misc freelist major_gc minor_gc memory alloc compare ints \
34-
floats str array io extern intern hash sys parsing gc_ctrl eventlog md5 obj \
34+
floats simd str array io extern intern hash sys parsing gc_ctrl eventlog md5 obj \
3535
lexing $(UNIX_OR_WIN32) printexc callback weak compact finalise custom \
3636
globroots backtrace_nat backtrace dynlink_nat debugger meta \
3737
dynlink clambda_checks afl bigarray \

runtime/caml/simd.h

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/**************************************************************************/
2+
/* */
3+
/* OCaml */
4+
/* */
5+
/* Max Slater, Jane Street */
6+
/* */
7+
/* Copyright 2023 Jane Street Group LLC */
8+
/* */
9+
/* All rights reserved. This file is distributed under the terms of */
10+
/* the GNU Lesser General Public License version 2.1, with the */
11+
/* special exception on linking described in the file LICENSE. */
12+
/* */
13+
/**************************************************************************/
14+
15+
/* SIMD vector instruction support */
16+
17+
#ifndef CAML_SIMD_H
18+
#define CAML_SIMD_H
19+
20+
#include "mlvalues.h"
21+
22+
#if defined(_M_IX86_FP) || defined(__SSE2__) || defined(__SSE3__) || \
23+
defined(__SSSE3__) || defined(__SSE4_1__) || defined(__SSE4_2__)
24+
#define ARCH_SSE2
25+
#endif
26+
27+
#if defined(__AVX__) || defined(__AVX2__)
28+
#define ARCH_AVX
29+
#endif
30+
31+
#ifdef ARCH_SSE2
32+
#include <emmintrin.h>
33+
34+
#define Vec128_val(v) _mm_loadu_ps((const float*)Bp_val(v))
35+
#define Vec128_vald(v) _mm_loadu_pd((const double*)Bp_val(v))
36+
#define Vec128_vali(v) _mm_loadu_si128((const __m128i*)Bp_val(v))
37+
#define Store_vec128_val(v,x) _mm_storeu_ps((float*)Bp_val(v), x)
38+
#define Store_vec128_vald(v,x) _mm_storeu_pd((double*)Bp_val(v), x)
39+
#define Store_vec128_vali(v,x) _mm_storeu_si128((__m128i*)Bp_val(v), x)
40+
41+
CAMLextern value caml_copy_vec128(__m128);
42+
CAMLextern value caml_copy_vec128i(__m128i);
43+
CAMLextern value caml_copy_vec128d(__m128d);
44+
#endif
45+
46+
#endif /* CAML_SIMD_H */

runtime/simd.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/**************************************************************************/
2+
/* */
3+
/* OCaml */
4+
/* */
5+
/* Max Slater, Jane Street */
6+
/* */
7+
/* Copyright 2023 Jane Street Group LLC */
8+
/* */
9+
/* All rights reserved. This file is distributed under the terms of */
10+
/* the GNU Lesser General Public License version 2.1, with the */
11+
/* special exception on linking described in the file LICENSE. */
12+
/* */
13+
/**************************************************************************/
14+
15+
#define CAML_INTERNALS
16+
17+
#include "caml/alloc.h"
18+
#include "caml/simd.h"
19+
20+
#ifdef ARCH_SSE2
21+
22+
CAMLexport value caml_copy_vec128(__m128 v) {
23+
value res = caml_alloc_small(2, Abstract_tag);
24+
Store_vec128_val(res, v);
25+
return res;
26+
}
27+
28+
CAMLexport value caml_copy_vec128i(__m128i v) {
29+
value res = caml_alloc_small(2, Abstract_tag);
30+
Store_vec128_vali(res, v);
31+
return res;
32+
}
33+
34+
CAMLexport value caml_copy_vec128d(__m128d v) {
35+
value res = caml_alloc_small(2, Abstract_tag);
36+
Store_vec128_vald(res, v);
37+
return res;
38+
}
39+
40+
#endif

0 commit comments

Comments
 (0)