Skip to content

Commit 9348c33

Browse files
committed
A more efficient slice comparison implementation for T: !BytewiseEq
The previous implementation was not optimized properly by the compiler, which didn't leverage the fact that both length were equal.
1 parent 347452e commit 9348c33

File tree

4 files changed

+162
-1
lines changed

4 files changed

+162
-1
lines changed

library/core/src/slice/cmp.rs

+18-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,24 @@ where
6060
return false;
6161
}
6262

63-
self.iter().zip(other.iter()).all(|(x, y)| x == y)
63+
let mut i = self.len();
64+
let mut ptr_self = self.as_ptr();
65+
let mut ptr_other = other.as_ptr();
66+
// SAFETY:
67+
// This is sound because:
68+
// - self.len == other.len
69+
// - self.len <= isize::MAX
70+
// so the two pointers will not overflow,
71+
// will remain in bounds of the slice,
72+
// and dereferencing them is sound.
73+
unsafe {
74+
while (i > 0) && (*ptr_self == *ptr_other) {
75+
i -= 1;
76+
ptr_self = ptr_self.add(1);
77+
ptr_other = ptr_other.add(1);
78+
}
79+
}
80+
i == 0
6481
}
6582
}
6683

mre.ll

+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
; ModuleID = 'mre.44c9d9ceb0d5fb53-cgu.0'
2+
source_filename = "mre.44c9d9ceb0d5fb53-cgu.0"
3+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
4+
target triple = "x86_64-unknown-linux-gnu"
5+
6+
; Function Attrs: nofree nosync nounwind nonlazybind memory(read) uwtable
7+
define noundef zeroext i1 @test(ptr noalias nocapture noundef nonnull readonly align 8 %0, i64 noundef %1, ptr noalias nocapture noundef nonnull readonly align 8 %2, i64 noundef %3) unnamed_addr #0 {
8+
start:
9+
tail call void @llvm.experimental.noalias.scope.decl(metadata !3)
10+
tail call void @llvm.experimental.noalias.scope.decl(metadata !6)
11+
%_3.not.i = icmp eq i64 %1, %3
12+
br i1 %_3.not.i, label %bb3.preheader.i, label %"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E.exit"
13+
14+
bb3.preheader.i: ; preds = %start
15+
%_9.not4.i = icmp eq i64 %1, 0
16+
br i1 %_9.not4.i, label %"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E.exit", label %bb4.i
17+
18+
bb4.i: ; preds = %bb3.preheader.i, %bb6.i
19+
%i.07.i = phi i64 [ %6, %bb6.i ], [ %1, %bb3.preheader.i ]
20+
%ptr_self.06.i = phi ptr [ %_14.i, %bb6.i ], [ %0, %bb3.preheader.i ]
21+
%ptr_other.05.i = phi ptr [ %_16.i, %bb6.i ], [ %2, %bb3.preheader.i ]
22+
tail call void @llvm.experimental.noalias.scope.decl(metadata !8)
23+
tail call void @llvm.experimental.noalias.scope.decl(metadata !11)
24+
tail call void @llvm.experimental.noalias.scope.decl(metadata !13)
25+
tail call void @llvm.experimental.noalias.scope.decl(metadata !16)
26+
%_5.i.i.i = load i64, ptr %ptr_self.06.i, align 8, !range !18, !alias.scope !19, !noalias !20, !noundef !21
27+
%trunc.not.i.i.i = icmp eq i64 %_5.i.i.i, 0
28+
%_3.i.i.i = load i64, ptr %ptr_other.05.i, align 8, !range !18, !alias.scope !20, !noalias !19, !noundef !21
29+
%4 = icmp eq i64 %_3.i.i.i, 0
30+
%brmerge.i.i.i = or i1 %trunc.not.i.i.i, %4
31+
br i1 %brmerge.i.i.i, label %"_ZN70_$LT$core..option..Option$LT$T$GT$$u20$as$u20$core..cmp..PartialEq$GT$2eq17h839ffac60ba8c424E.exit.i", label %bb4.i.i.i
32+
33+
bb4.i.i.i: ; preds = %bb4.i
34+
%_8.i.i.i = getelementptr inbounds { i64, i64 }, ptr %ptr_self.06.i, i64 0, i32 1
35+
%_9.i.i.i = getelementptr inbounds { i64, i64 }, ptr %ptr_other.05.i, i64 0, i32 1
36+
tail call void @llvm.experimental.noalias.scope.decl(metadata !22)
37+
tail call void @llvm.experimental.noalias.scope.decl(metadata !25)
38+
%_3.i.i.i.i = load i64, ptr %_8.i.i.i, align 8, !alias.scope !27, !noalias !28, !noundef !21
39+
%_4.i.i.i.i = load i64, ptr %_9.i.i.i, align 8, !alias.scope !28, !noalias !27, !noundef !21
40+
%_0.i.i.i.i = icmp eq i64 %_3.i.i.i.i, %_4.i.i.i.i
41+
br i1 %_0.i.i.i.i, label %bb6.i, label %"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E.exit"
42+
43+
"_ZN70_$LT$core..option..Option$LT$T$GT$$u20$as$u20$core..cmp..PartialEq$GT$2eq17h839ffac60ba8c424E.exit.i": ; preds = %bb4.i
44+
%5 = or i64 %_3.i.i.i, %_5.i.i.i
45+
%.mux.i.i.i = icmp eq i64 %5, 0
46+
br i1 %.mux.i.i.i, label %bb6.i, label %"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E.exit"
47+
48+
bb6.i: ; preds = %"_ZN70_$LT$core..option..Option$LT$T$GT$$u20$as$u20$core..cmp..PartialEq$GT$2eq17h839ffac60ba8c424E.exit.i", %bb4.i.i.i
49+
%6 = add i64 %i.07.i, -1
50+
%_14.i = getelementptr inbounds { i64, i64 }, ptr %ptr_self.06.i, i64 1
51+
%_16.i = getelementptr inbounds { i64, i64 }, ptr %ptr_other.05.i, i64 1
52+
%_9.not.i = icmp eq i64 %6, 0
53+
br i1 %_9.not.i, label %"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E.exit", label %bb4.i
54+
55+
"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E.exit": ; preds = %bb4.i.i.i, %"_ZN70_$LT$core..option..Option$LT$T$GT$$u20$as$u20$core..cmp..PartialEq$GT$2eq17h839ffac60ba8c424E.exit.i", %bb6.i, %start, %bb3.preheader.i
56+
%_0.0.i = phi i1 [ false, %start ], [ true, %bb3.preheader.i ], [ true, %bb6.i ], [ false, %"_ZN70_$LT$core..option..Option$LT$T$GT$$u20$as$u20$core..cmp..PartialEq$GT$2eq17h839ffac60ba8c424E.exit.i" ], [ false, %bb4.i.i.i ]
57+
ret i1 %_0.0.i
58+
}
59+
60+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
61+
declare void @llvm.experimental.noalias.scope.decl(metadata) #1
62+
63+
attributes #0 = { nofree nosync nounwind nonlazybind memory(read) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
64+
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
65+
66+
!llvm.module.flags = !{!0, !1}
67+
!llvm.ident = !{!2}
68+
69+
!0 = !{i32 8, !"PIC Level", i32 2}
70+
!1 = !{i32 2, !"RtLibUseGOT", i32 1}
71+
!2 = !{!"rustc version 1.74.0-beta.1 (b5c050feb 2023-10-03)"}
72+
!3 = !{!4}
73+
!4 = distinct !{!4, !5, !"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E: %self.0"}
74+
!5 = distinct !{!5, !"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E"}
75+
!6 = !{!7}
76+
!7 = distinct !{!7, !5, !"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h92b3a432973c4ce3E: %other.0"}
77+
!8 = !{!9}
78+
!9 = distinct !{!9, !10, !"_ZN70_$LT$core..option..Option$LT$T$GT$$u20$as$u20$core..cmp..PartialEq$GT$2eq17h839ffac60ba8c424E: %self"}
79+
!10 = distinct !{!10, !"_ZN70_$LT$core..option..Option$LT$T$GT$$u20$as$u20$core..cmp..PartialEq$GT$2eq17h839ffac60ba8c424E"}
80+
!11 = !{!12}
81+
!12 = distinct !{!12, !10, !"_ZN70_$LT$core..option..Option$LT$T$GT$$u20$as$u20$core..cmp..PartialEq$GT$2eq17h839ffac60ba8c424E: %other"}
82+
!13 = !{!14}
83+
!14 = distinct !{!14, !15, !"_ZN55_$LT$T$u20$as$u20$core..option..SpecOptionPartialEq$GT$2eq17h42d3740d8c76b9d0E: %l"}
84+
!15 = distinct !{!15, !"_ZN55_$LT$T$u20$as$u20$core..option..SpecOptionPartialEq$GT$2eq17h42d3740d8c76b9d0E"}
85+
!16 = !{!17}
86+
!17 = distinct !{!17, !15, !"_ZN55_$LT$T$u20$as$u20$core..option..SpecOptionPartialEq$GT$2eq17h42d3740d8c76b9d0E: %r"}
87+
!18 = !{i64 0, i64 2}
88+
!19 = !{!14, !9, !4}
89+
!20 = !{!17, !12, !7}
90+
!21 = !{}
91+
!22 = !{!23}
92+
!23 = distinct !{!23, !24, !"_ZN4core3cmp5impls54_$LT$impl$u20$core..cmp..PartialEq$u20$for$u20$u64$GT$2eq17hcb36ad6f45b649e4E: %self"}
93+
!24 = distinct !{!24, !"_ZN4core3cmp5impls54_$LT$impl$u20$core..cmp..PartialEq$u20$for$u20$u64$GT$2eq17hcb36ad6f45b649e4E"}
94+
!25 = !{!26}
95+
!26 = distinct !{!26, !24, !"_ZN4core3cmp5impls54_$LT$impl$u20$core..cmp..PartialEq$u20$for$u20$u64$GT$2eq17hcb36ad6f45b649e4E: %other"}
96+
!27 = !{!23, !14, !9, !4}
97+
!28 = !{!26, !17, !12, !7}

mre.rs

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#[no_mangle]
2+
pub fn test(x: &[Option<u64>], y: &[Option<u64>]) -> bool {
3+
x == y
4+
}

mre.s

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
.text
2+
.file "mre.44c9d9ceb0d5fb53-cgu.0"
3+
.section .text.test,"ax",@progbits
4+
.globl test
5+
.p2align 4, 0x90
6+
.type test,@function
7+
test:
8+
.cfi_startproc
9+
cmpq %rcx, %rsi
10+
jne .LBB0_9
11+
movb $1, %al
12+
testq %rsi, %rsi
13+
je .LBB0_10
14+
movl $8, %ecx
15+
jmp .LBB0_5
16+
.p2align 4, 0x90
17+
.LBB0_3:
18+
orq %r8, %r9
19+
jne .LBB0_9
20+
.LBB0_4:
21+
addq $16, %rcx
22+
decq %rsi
23+
je .LBB0_10
24+
.LBB0_5:
25+
movq -8(%rdi,%rcx), %r8
26+
movq -8(%rdx,%rcx), %r9
27+
testq %r8, %r8
28+
je .LBB0_3
29+
testq %r9, %r9
30+
je .LBB0_3
31+
movq (%rdi,%rcx), %r8
32+
cmpq (%rdx,%rcx), %r8
33+
je .LBB0_4
34+
.LBB0_9:
35+
xorl %eax, %eax
36+
.LBB0_10:
37+
retq
38+
.Lfunc_end0:
39+
.size test, .Lfunc_end0-test
40+
.cfi_endproc
41+
42+
.ident "rustc version 1.74.0-beta.1 (b5c050feb 2023-10-03)"
43+
.section ".note.GNU-stack","",@progbits

0 commit comments

Comments
 (0)