Skip to content

Commit f4528dd

Browse files
eduardosmAmanieu
authored andcommitted
Fix UB in _mm_movemask_ps, _mm_movemask_pd, _mm256_movemask_ps and _mm256_movemask_pd
The `simd_bitmask` intrinsic requires each element to be all-1 or all-0, while the x86 functions only check for the highest bit.
1 parent 79a4109 commit f4528dd

File tree

3 files changed

+16
-4
lines changed

3 files changed

+16
-4
lines changed

crates/core_arch/src/x86/avx.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -2066,7 +2066,10 @@ pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
20662066
#[cfg_attr(test, assert_instr(vmovmskpd))]
20672067
#[stable(feature = "simd_x86", since = "1.27.0")]
20682068
pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
2069-
simd_bitmask::<u64x4, u8>(transmute(a)).into()
2069+
// Propagate the highest bit to the rest, because simd_bitmask
2070+
// requires all-1 or all-0.
2071+
let mask: i64x4 = simd_lt(transmute(a), i64x4::splat(0));
2072+
simd_bitmask::<i64x4, u8>(mask).into()
20702073
}
20712074

20722075
/// Sets each bit of the returned mask based on the most significant bit of the
@@ -2079,7 +2082,10 @@ pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
20792082
#[cfg_attr(test, assert_instr(vmovmskps))]
20802083
#[stable(feature = "simd_x86", since = "1.27.0")]
20812084
pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
2082-
simd_bitmask::<u32x8, u8>(transmute(a)).into()
2085+
// Propagate the highest bit to the rest, because simd_bitmask
2086+
// requires all-1 or all-0.
2087+
let mask: i32x8 = simd_lt(transmute(a), i32x8::splat(0));
2088+
simd_bitmask::<i32x8, u8>(mask).into()
20832089
}
20842090

20852091
/// Returns vector of type __m256d with all elements set to zero.

crates/core_arch/src/x86/sse.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -1081,7 +1081,10 @@ pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
10811081
#[cfg_attr(test, assert_instr(movmskps))]
10821082
#[stable(feature = "simd_x86", since = "1.27.0")]
10831083
pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
1084-
simd_bitmask::<u32x4, u8>(transmute(a)).into()
1084+
// Propagate the highest bit to the rest, because simd_bitmask
1085+
// requires all-1 or all-0.
1086+
let mask: i32x4 = simd_lt(transmute(a), i32x4::splat(0));
1087+
simd_bitmask::<i32x4, u8>(mask).into()
10851088
}
10861089

10871090
/// Construct a `__m128` with the lowest element read from `p` and the other

crates/core_arch/src/x86/sse2.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -2450,7 +2450,10 @@ pub unsafe fn _mm_setzero_pd() -> __m128d {
24502450
#[cfg_attr(test, assert_instr(movmskpd))]
24512451
#[stable(feature = "simd_x86", since = "1.27.0")]
24522452
pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 {
2453-
simd_bitmask::<u64x2, u8>(transmute(a)).into()
2453+
// Propagate the highest bit to the rest, because simd_bitmask
2454+
// requires all-1 or all-0.
2455+
let mask: i64x2 = simd_lt(transmute(a), i64x2::splat(0));
2456+
simd_bitmask::<i64x2, u8>(mask).into()
24542457
}
24552458

24562459
/// Loads 128-bits (composed of 2 packed double-precision (64-bit)

0 commit comments

Comments
 (0)