Skip to content

Commit dbc5758

Browse files
committed
auto merge of #6724 : thestinger/rust/swap_fast, r=thestinger
Passing higher alignment values gives the optimization passes more freedom since it can copy in larger chunks. This change results in rustc outputting the same post-optimization IR as clang for swaps and most copies excluding the lack of information about padding. Code snippet: ```rust #[inline(never)] fn swap<T>(x: &mut T, y: &mut T) { util::swap(x, y); } ``` Original IR (for `int`): ```llvm define internal fastcc void @_ZN9swap_283417_a71830ca3ed2d65d3_00E(i64*, i64*) #1 { static_allocas: %2 = icmp eq i64* %0, %1 br i1 %2, label %_ZN4util9swap_283717_a71830ca3ed2d65d3_00E.exit, label %3 ; <label>:3 ; preds = %static_allocas %4 = load i64* %0, align 1 %5 = load i64* %1, align 1 store i64 %5, i64* %0, align 1 store i64 %4, i64* %1, align 1 br label %_ZN4util9swap_283717_a71830ca3ed2d65d3_00E.exit _ZN4util9swap_283717_a71830ca3ed2d65d3_00E.exit: ; preds = %3, %static_allocas ret void } ``` After #6710: ```llvm define internal fastcc void @_ZN9swap_283017_a71830ca3ed2d65d3_00E(i64* nocapture, i64* nocapture) #1 { static_allocas: %2 = load i64* %0, align 1 %3 = load i64* %1, align 1 store i64 %3, i64* %0, align 1 store i64 %2, i64* %1, align 1 ret void } ``` After this change: ```llvm define internal fastcc void @_ZN9swap_283017_a71830ca3ed2d65d3_00E(i64* nocapture, i64* nocapture) #1 { static_allocas: %2 = load i64* %0, align 8 %3 = load i64* %1, align 8 store i64 %3, i64* %0, align 8 store i64 %2, i64* %1, align 8 ret void } ``` Another example: ```rust #[inline(never)] fn set<T>(x: &mut T, y: T) { *x = y; } ``` Before, with `(int, int)` (align 1): ```llvm define internal fastcc void @_ZN8set_282517_8fa972e3f9e451983_00E({ i64, i64 }* nocapture, { i64, i64 }* nocapture) #1 { static_allocas: %2 = bitcast { i64, i64 }* %1 to i8* %3 = bitcast { i64, i64 }* %0 to i8* tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %2, i64 16, i32 1, i1 false) ret void } ``` After, with `(int, int)` (align 8): ```llvm define internal fastcc void @_ZN8set_282617_8fa972e3f9e451983_00E({ i64, i64 }* nocapture, { i64, i64 }* nocapture) #1 { static_allocas: %2 = bitcast { i64, i64 }* %1 to i8* %3 = bitcast { i64, i64 }* %0 to i8* tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %2, i64 16, i32 8, i1 false) ret void } ```
2 parents 5d04ee8 + e6c04de commit dbc5758

File tree

9 files changed

+231
-80
lines changed

9 files changed

+231
-80
lines changed

src/librustc/middle/trans/base.rs

+7-10
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ use middle::trans::foreign;
5252
use middle::trans::glue;
5353
use middle::trans::inline;
5454
use middle::trans::machine;
55-
use middle::trans::machine::llsize_of;
55+
use middle::trans::machine::{llalign_of_min, llsize_of};
5656
use middle::trans::meth;
5757
use middle::trans::monomorphize;
5858
use middle::trans::reachable;
@@ -1442,12 +1442,7 @@ pub fn with_cond(bcx: block, val: ValueRef, f: &fn(block) -> block) -> block {
14421442
next_cx
14431443
}
14441444

1445-
pub fn call_memcpy(cx: block, dst: ValueRef, src: ValueRef,
1446-
n_bytes: ValueRef) {
1447-
// FIXME (Related to #1645, I think?): Provide LLVM with better
1448-
// alignment information when the alignment is statically known (it must
1449-
// be nothing more than a constant int, or LLVM complains -- not even a
1450-
// constant element of a tydesc works).
1445+
pub fn call_memcpy(cx: block, dst: ValueRef, src: ValueRef, n_bytes: ValueRef, align: u32) {
14511446
let _icx = cx.insn_ctxt("call_memcpy");
14521447
let ccx = cx.ccx();
14531448
let key = match ccx.sess.targ_cfg.arch {
@@ -1462,7 +1457,7 @@ pub fn call_memcpy(cx: block, dst: ValueRef, src: ValueRef,
14621457
let src_ptr = PointerCast(cx, src, T_ptr(T_i8()));
14631458
let dst_ptr = PointerCast(cx, dst, T_ptr(T_i8()));
14641459
let size = IntCast(cx, n_bytes, ccx.int_type);
1465-
let align = C_i32(1i32);
1460+
let align = C_i32(align as i32);
14661461
let volatile = C_i1(false);
14671462
Call(cx, memcpy, [dst_ptr, src_ptr, size, align, volatile]);
14681463
}
@@ -1471,8 +1466,10 @@ pub fn memcpy_ty(bcx: block, dst: ValueRef, src: ValueRef, t: ty::t) {
14711466
let _icx = bcx.insn_ctxt("memcpy_ty");
14721467
let ccx = bcx.ccx();
14731468
if ty::type_is_structural(t) {
1474-
let llsz = llsize_of(ccx, type_of::type_of(ccx, t));
1475-
call_memcpy(bcx, dst, src, llsz);
1469+
let llty = type_of::type_of(ccx, t);
1470+
let llsz = llsize_of(ccx, llty);
1471+
let llalign = llalign_of_min(ccx, llty);
1472+
call_memcpy(bcx, dst, src, llsz, llalign as u32);
14761473
} else {
14771474
Store(bcx, Load(bcx, src), dst);
14781475
}

src/librustc/middle/trans/closure.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ pub fn make_opaque_cbox_take_glue(
521521
[opaque_tydesc, sz],
522522
expr::SaveIn(rval));
523523
let cbox_out = PointerCast(bcx, Load(bcx, rval), llopaquecboxty);
524-
call_memcpy(bcx, cbox_out, cbox_in, sz);
524+
call_memcpy(bcx, cbox_out, cbox_in, sz, 1);
525525
Store(bcx, cbox_out, cboxptr);
526526

527527
// Take the (deeply cloned) type descriptor

src/librustc/middle/trans/foreign.rs

+67-29
Original file line numberDiff line numberDiff line change
@@ -787,7 +787,7 @@ pub fn trans_intrinsic(ccx: @CrateContext,
787787
let llsrcptr = PointerCast(bcx, llsrcptr, T_ptr(T_i8()));
788788

789789
let llsize = llsize_of(ccx, llintype);
790-
call_memcpy(bcx, lldestptr, llsrcptr, llsize);
790+
call_memcpy(bcx, lldestptr, llsrcptr, llsize, 1);
791791
}
792792
}
793793
~"needs_drop" => {
@@ -846,44 +846,82 @@ pub fn trans_intrinsic(ccx: @CrateContext,
846846
Store(bcx, morestack_addr, fcx.llretptr.get());
847847
}
848848
~"memcpy32" => {
849-
let dst_ptr = get_param(decl, first_real_arg);
850-
let src_ptr = get_param(decl, first_real_arg + 1);
851-
let size = get_param(decl, first_real_arg + 2);
852-
let align = C_i32(1);
849+
let tp_ty = substs.tys[0];
850+
let lltp_ty = type_of::type_of(ccx, tp_ty);
851+
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
852+
let size = C_i32(machine::llsize_of_real(ccx, lltp_ty) as i32);
853+
854+
let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
855+
let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), T_ptr(T_i8()));
856+
let count = get_param(decl, first_real_arg + 2);
853857
let volatile = C_i1(false);
854-
let llfn = *bcx.ccx().intrinsics.get(
855-
&~"llvm.memcpy.p0i8.p0i8.i32");
856-
Call(bcx, llfn, [dst_ptr, src_ptr, size, align, volatile]);
858+
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memcpy.p0i8.p0i8.i32");
859+
Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
857860
}
858861
~"memcpy64" => {
859-
let dst_ptr = get_param(decl, first_real_arg);
860-
let src_ptr = get_param(decl, first_real_arg + 1);
861-
let size = get_param(decl, first_real_arg + 2);
862-
let align = C_i32(1);
862+
let tp_ty = substs.tys[0];
863+
let lltp_ty = type_of::type_of(ccx, tp_ty);
864+
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
865+
let size = C_i64(machine::llsize_of_real(ccx, lltp_ty) as i64);
866+
867+
let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
868+
let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), T_ptr(T_i8()));
869+
let count = get_param(decl, first_real_arg + 2);
863870
let volatile = C_i1(false);
864-
let llfn = *bcx.ccx().intrinsics.get(
865-
&~"llvm.memcpy.p0i8.p0i8.i64");
866-
Call(bcx, llfn, [dst_ptr, src_ptr, size, align, volatile]);
871+
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memcpy.p0i8.p0i8.i64");
872+
Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
867873
}
868874
~"memmove32" => {
869-
let dst_ptr = get_param(decl, first_real_arg);
870-
let src_ptr = get_param(decl, first_real_arg + 1);
871-
let size = get_param(decl, first_real_arg + 2);
872-
let align = C_i32(1);
875+
let tp_ty = substs.tys[0];
876+
let lltp_ty = type_of::type_of(ccx, tp_ty);
877+
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
878+
let size = C_i32(machine::llsize_of_real(ccx, lltp_ty) as i32);
879+
880+
let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
881+
let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), T_ptr(T_i8()));
882+
let count = get_param(decl, first_real_arg + 2);
873883
let volatile = C_i1(false);
874-
let llfn = *bcx.ccx().intrinsics.get(
875-
&~"llvm.memmove.p0i8.p0i8.i32");
876-
Call(bcx, llfn, [dst_ptr, src_ptr, size, align, volatile]);
884+
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memmove.p0i8.p0i8.i32");
885+
Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
877886
}
878887
~"memmove64" => {
879-
let dst_ptr = get_param(decl, first_real_arg);
880-
let src_ptr = get_param(decl, first_real_arg + 1);
881-
let size = get_param(decl, first_real_arg + 2);
882-
let align = C_i32(1);
888+
let tp_ty = substs.tys[0];
889+
let lltp_ty = type_of::type_of(ccx, tp_ty);
890+
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
891+
let size = C_i64(machine::llsize_of_real(ccx, lltp_ty) as i64);
892+
893+
let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
894+
let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), T_ptr(T_i8()));
895+
let count = get_param(decl, first_real_arg + 2);
896+
let volatile = C_i1(false);
897+
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memmove.p0i8.p0i8.i64");
898+
Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
899+
}
900+
~"memset32" => {
901+
let tp_ty = substs.tys[0];
902+
let lltp_ty = type_of::type_of(ccx, tp_ty);
903+
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
904+
let size = C_i32(machine::llsize_of_real(ccx, lltp_ty) as i32);
905+
906+
let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
907+
let val = get_param(decl, first_real_arg + 1);
908+
let count = get_param(decl, first_real_arg + 2);
909+
let volatile = C_i1(false);
910+
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memset.p0i8.i32");
911+
Call(bcx, llfn, [dst_ptr, val, Mul(bcx, size, count), align, volatile]);
912+
}
913+
~"memset64" => {
914+
let tp_ty = substs.tys[0];
915+
let lltp_ty = type_of::type_of(ccx, tp_ty);
916+
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
917+
let size = C_i64(machine::llsize_of_real(ccx, lltp_ty) as i64);
918+
919+
let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
920+
let val = get_param(decl, first_real_arg + 1);
921+
let count = get_param(decl, first_real_arg + 2);
883922
let volatile = C_i1(false);
884-
let llfn = *bcx.ccx().intrinsics.get(
885-
&~"llvm.memmove.p0i8.p0i8.i64");
886-
Call(bcx, llfn, [dst_ptr, src_ptr, size, align, volatile]);
923+
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memset.p0i8.i64");
924+
Call(bcx, llfn, [dst_ptr, val, Mul(bcx, size, count), align, volatile]);
887925
}
888926
~"sqrtf32" => {
889927
let x = get_param(decl, first_real_arg);

src/librustc/middle/trans/tvec.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ pub fn duplicate_uniq(bcx: block, vptr: ValueRef, vec_ty: ty::t) -> Result {
125125

126126
let data_ptr = get_dataptr(bcx, get_bodyptr(bcx, vptr));
127127
let new_data_ptr = get_dataptr(bcx, get_bodyptr(bcx, newptr));
128-
base::call_memcpy(bcx, new_data_ptr, data_ptr, fill);
128+
base::call_memcpy(bcx, new_data_ptr, data_ptr, fill, 1);
129129

130130
let bcx = if ty::type_needs_drop(bcx.tcx(), unit_ty) {
131131
iter_vec_raw(bcx, new_data_ptr, vec_ty, fill, glue::take_ty)
@@ -370,7 +370,7 @@ pub fn write_content(bcx: block,
370370
let bytes = s.len() + 1; // copy null-terminator too
371371
let llbytes = C_uint(bcx.ccx(), bytes);
372372
let llcstr = C_cstr(bcx.ccx(), s);
373-
base::call_memcpy(bcx, lldest, llcstr, llbytes);
373+
base::call_memcpy(bcx, lldest, llcstr, llbytes, 1);
374374
return bcx;
375375
}
376376
}

src/librustc/middle/trans/type_use.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,8 @@ pub fn type_uses_for(ccx: @CrateContext, fn_id: def_id, n_tps: uint)
135135
~"visit_tydesc" | ~"forget" | ~"frame_address" |
136136
~"morestack_addr" => 0,
137137

138-
~"memcpy32" | ~"memcpy64" | ~"memmove32" | ~"memmove64" => 0,
138+
~"memcpy32" | ~"memcpy64" | ~"memmove32" | ~"memmove64" |
139+
~"memset32" | ~"memset64" => use_repr,
139140

140141
~"sqrtf32" | ~"sqrtf64" | ~"powif32" | ~"powif64" |
141142
~"sinf32" | ~"sinf64" | ~"cosf32" | ~"cosf64" |

src/librustc/middle/typeck/check/mod.rs

+36-12
Original file line numberDiff line numberDiff line change
@@ -3538,65 +3538,89 @@ pub fn check_intrinsic_type(ccx: @mut CrateCtxt, it: @ast::foreign_item) {
35383538
(0u, ~[], ty::mk_nil_ptr(ccx.tcx))
35393539
}
35403540
~"memcpy32" => {
3541-
(0,
3541+
(1,
35423542
~[
35433543
ty::mk_ptr(tcx, ty::mt {
3544-
ty: ty::mk_u8(),
3544+
ty: param(ccx, 0),
35453545
mutbl: ast::m_mutbl
35463546
}),
35473547
ty::mk_ptr(tcx, ty::mt {
3548-
ty: ty::mk_u8(),
3548+
ty: param(ccx, 0),
35493549
mutbl: ast::m_imm
35503550
}),
35513551
ty::mk_u32()
35523552
],
35533553
ty::mk_nil())
35543554
}
35553555
~"memcpy64" => {
3556-
(0,
3556+
(1,
35573557
~[
35583558
ty::mk_ptr(tcx, ty::mt {
3559-
ty: ty::mk_u8(),
3559+
ty: param(ccx, 0),
35603560
mutbl: ast::m_mutbl
35613561
}),
35623562
ty::mk_ptr(tcx, ty::mt {
3563-
ty: ty::mk_u8(),
3563+
ty: param(ccx, 0),
35643564
mutbl: ast::m_imm
35653565
}),
35663566
ty::mk_u64()
35673567
],
35683568
ty::mk_nil())
35693569
}
35703570
~"memmove32" => {
3571-
(0,
3571+
(1,
35723572
~[
35733573
ty::mk_ptr(tcx, ty::mt {
3574-
ty: ty::mk_u8(),
3574+
ty: param(ccx, 0),
35753575
mutbl: ast::m_mutbl
35763576
}),
35773577
ty::mk_ptr(tcx, ty::mt {
3578-
ty: ty::mk_u8(),
3578+
ty: param(ccx, 0),
35793579
mutbl: ast::m_imm
35803580
}),
35813581
ty::mk_u32()
35823582
],
35833583
ty::mk_nil())
35843584
}
35853585
~"memmove64" => {
3586-
(0,
3586+
(1,
35873587
~[
35883588
ty::mk_ptr(tcx, ty::mt {
3589-
ty: ty::mk_u8(),
3589+
ty: param(ccx, 0),
35903590
mutbl: ast::m_mutbl
35913591
}),
35923592
ty::mk_ptr(tcx, ty::mt {
3593-
ty: ty::mk_u8(),
3593+
ty: param(ccx, 0),
35943594
mutbl: ast::m_imm
35953595
}),
35963596
ty::mk_u64()
35973597
],
35983598
ty::mk_nil())
35993599
}
3600+
~"memset32" => {
3601+
(1,
3602+
~[
3603+
ty::mk_ptr(tcx, ty::mt {
3604+
ty: param(ccx, 0),
3605+
mutbl: ast::m_mutbl
3606+
}),
3607+
ty::mk_u8(),
3608+
ty::mk_u32()
3609+
],
3610+
ty::mk_nil())
3611+
}
3612+
~"memset64" => {
3613+
(1,
3614+
~[
3615+
ty::mk_ptr(tcx, ty::mt {
3616+
ty: param(ccx, 0),
3617+
mutbl: ast::m_mutbl
3618+
}),
3619+
ty::mk_u8(),
3620+
ty::mk_u64()
3621+
],
3622+
ty::mk_nil())
3623+
}
36003624
~"sqrtf32" => (0, ~[ ty::mk_f32() ], ty::mk_f32()),
36013625
~"sqrtf64" => (0, ~[ ty::mk_f64() ], ty::mk_f64()),
36023626
~"powif32" => {

src/libstd/cast.rs

+23-1
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@ use sys;
1414
use unstable::intrinsics;
1515

1616
/// Casts the value at `src` to U. The two types must have the same length.
17+
#[cfg(stage0)]
1718
pub unsafe fn transmute_copy<T, U>(src: &T) -> U {
18-
let mut dest: U = intrinsics::init();
19+
let mut dest: U = intrinsics::uninit();
1920
{
2021
let dest_ptr: *mut u8 = transmute(&mut dest);
2122
let src_ptr: *u8 = transmute(src);
@@ -26,6 +27,26 @@ pub unsafe fn transmute_copy<T, U>(src: &T) -> U {
2627
dest
2728
}
2829

30+
#[cfg(target_word_size = "32", not(stage0))]
31+
#[inline(always)]
32+
pub unsafe fn transmute_copy<T, U>(src: &T) -> U {
33+
let mut dest: U = intrinsics::uninit();
34+
let dest_ptr: *mut u8 = transmute(&mut dest);
35+
let src_ptr: *u8 = transmute(src);
36+
intrinsics::memcpy32(dest_ptr, src_ptr, sys::size_of::<U>() as u32);
37+
dest
38+
}
39+
40+
#[cfg(target_word_size = "64", not(stage0))]
41+
#[inline(always)]
42+
pub unsafe fn transmute_copy<T, U>(src: &T) -> U {
43+
let mut dest: U = intrinsics::uninit();
44+
let dest_ptr: *mut u8 = transmute(&mut dest);
45+
let src_ptr: *u8 = transmute(src);
46+
intrinsics::memcpy64(dest_ptr, src_ptr, sys::size_of::<U>() as u64);
47+
dest
48+
}
49+
2950
/**
3051
* Move a thing into the void
3152
*
@@ -43,6 +64,7 @@ pub unsafe fn forget<T>(thing: T) { intrinsics::forget(thing); }
4364
* and/or reinterpret_cast when such calls would otherwise scramble a box's
4465
* reference count
4566
*/
67+
#[inline(always)]
4668
pub unsafe fn bump_box_refcount<T>(t: @T) { forget(t); }
4769

4870
/**

0 commit comments

Comments
 (0)