Skip to content

Commit 87916f8

Browse files
authored
[CodeGen][NPM] Port MachineBlockPlacement to NPM (#129828)
1 parent 42748a4 commit 87916f8

11 files changed

+173
-37
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
//===- llvm/CodeGen/MachineBlockPlacement.h ---------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H
10+
#define LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H
11+
12+
#include "llvm/CodeGen/MachinePassManager.h"
13+
14+
namespace llvm {
15+
16+
class MachineBlockPlacementPass
17+
: public PassInfoMixin<MachineBlockPlacementPass> {
18+
19+
bool AllowTailMerge = true;
20+
21+
public:
22+
MachineBlockPlacementPass(bool AllowTailMerge)
23+
: AllowTailMerge(AllowTailMerge) {}
24+
PreservedAnalyses run(MachineFunction &MF,
25+
MachineFunctionAnalysisManager &MFAM);
26+
static bool isRequired() { return true; }
27+
28+
void
29+
printPipeline(raw_ostream &OS,
30+
function_ref<StringRef(StringRef)> MapClassName2PassName) const;
31+
};
32+
33+
} // namespace llvm
34+
35+
#endif // LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H

llvm/include/llvm/InitializePasses.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ void initializeMIRCanonicalizerPass(PassRegistry &);
184184
void initializeMIRNamerPass(PassRegistry &);
185185
void initializeMIRPrintingPassPass(PassRegistry &);
186186
void initializeMachineBlockFrequencyInfoWrapperPassPass(PassRegistry &);
187-
void initializeMachineBlockPlacementPass(PassRegistry &);
187+
void initializeMachineBlockPlacementLegacyPass(PassRegistry &);
188188
void initializeMachineBlockPlacementStatsPass(PassRegistry &);
189189
void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &);
190190
void initializeMachineCFGPrinterPass(PassRegistry &);

llvm/include/llvm/Passes/CodeGenPassBuilder.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include "llvm/CodeGen/LocalStackSlotAllocation.h"
4949
#include "llvm/CodeGen/LowerEmuTLS.h"
5050
#include "llvm/CodeGen/MIRPrinter.h"
51+
#include "llvm/CodeGen/MachineBlockPlacement.h"
5152
#include "llvm/CodeGen/MachineCSE.h"
5253
#include "llvm/CodeGen/MachineCopyPropagation.h"
5354
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
@@ -1226,7 +1227,7 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addMachineLateOptimization(
12261227
template <typename Derived, typename TargetMachineT>
12271228
void CodeGenPassBuilder<Derived, TargetMachineT>::addBlockPlacement(
12281229
AddMachinePass &addPass) const {
1229-
addPass(MachineBlockPlacementPass());
1230+
addPass(MachineBlockPlacementPass(Opt.EnableTailMerge));
12301231
// Run a separate pass to collect block placement statistics.
12311232
if (Opt.EnableBlockPlacementStats)
12321233
addPass(MachineBlockPlacementStatsPass());

llvm/include/llvm/Passes/MachinePassRegistry.def

+9-1
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,15 @@ MACHINE_FUNCTION_PASS("verify<machine-trace-metrics>", MachineTraceMetricsVerifi
195195
#define MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, \
196196
PARAMS)
197197
#endif
198+
199+
MACHINE_FUNCTION_PASS_WITH_PARAMS(
200+
"block-placement", "MachineBlockPlacementPass",
201+
[](bool AllowTailMerge) {
202+
// Default is true.
203+
return MachineBlockPlacementPass(AllowTailMerge);
204+
},
205+
parseMachineBlockPlacementPassOptions, "no-tail-merge;tail-merge")
206+
198207
MACHINE_FUNCTION_PASS_WITH_PARAMS(
199208
"branch-folder", "BranchFolderPass",
200209
[](bool EnableTailMerge) { return BranchFolderPass(EnableTailMerge); },
@@ -253,7 +262,6 @@ DUMMY_MACHINE_MODULE_PASS("mir-strip-debug", StripDebugMachineModulePass)
253262
#endif
254263
DUMMY_MACHINE_FUNCTION_PASS("bbsections-prepare", BasicBlockSectionsPass)
255264
DUMMY_MACHINE_FUNCTION_PASS("bbsections-profile-reader", BasicBlockSectionsProfileReaderPass)
256-
DUMMY_MACHINE_FUNCTION_PASS("block-placement", MachineBlockPlacementPass)
257265
DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", MachineBlockPlacementStatsPass)
258266
DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass)
259267
DUMMY_MACHINE_FUNCTION_PASS("cfguard-longjmp", CFGuardLongjmpPass)

llvm/lib/CodeGen/CodeGen.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
7272
initializeMIRNamerPass(Registry);
7373
initializeMIRProfileLoaderPassPass(Registry);
7474
initializeMachineBlockFrequencyInfoWrapperPassPass(Registry);
75-
initializeMachineBlockPlacementPass(Registry);
75+
initializeMachineBlockPlacementLegacyPass(Registry);
7676
initializeMachineBlockPlacementStatsPass(Registry);
7777
initializeMachineCFGPrinterPass(Registry);
7878
initializeMachineCSELegacyPass(Registry);

llvm/lib/CodeGen/MachineBlockPlacement.cpp

+94-33
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
//
2525
//===----------------------------------------------------------------------===//
2626

27+
#include "llvm/CodeGen/MachineBlockPlacement.h"
2728
#include "BranchFolding.h"
2829
#include "llvm/ADT/ArrayRef.h"
2930
#include "llvm/ADT/DenseMap.h"
@@ -357,7 +358,7 @@ class BlockChain {
357358
unsigned UnscheduledPredecessors = 0;
358359
};
359360

360-
class MachineBlockPlacement : public MachineFunctionPass {
361+
class MachineBlockPlacement {
361362
/// A type for a block filter set.
362363
using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
363364

@@ -409,7 +410,11 @@ class MachineBlockPlacement : public MachineFunctionPass {
409410

410411
ProfileSummaryInfo *PSI = nullptr;
411412

412-
TargetPassConfig *PassConfig = nullptr;
413+
// Tail merging is also determined based on
414+
// whether structured CFG is required.
415+
bool AllowTailMerge;
416+
417+
CodeGenOptLevel OptLevel;
413418

414419
/// Duplicator used to duplicate tails during placement.
415420
///
@@ -608,18 +613,48 @@ class MachineBlockPlacement : public MachineFunctionPass {
608613
/// Create a single CFG chain from the current block order.
609614
void createCFGChainExtTsp();
610615

616+
public:
617+
MachineBlockPlacement(const MachineBranchProbabilityInfo *MBPI,
618+
MachineLoopInfo *MLI, ProfileSummaryInfo *PSI,
619+
std::unique_ptr<MBFIWrapper> MBFI,
620+
MachinePostDominatorTree *MPDT, bool AllowTailMerge)
621+
: MBPI(MBPI), MBFI(std::move(MBFI)), MLI(MLI), MPDT(MPDT), PSI(PSI),
622+
AllowTailMerge(AllowTailMerge) {};
623+
624+
bool run(MachineFunction &F);
625+
626+
static bool allowTailDupPlacement(MachineFunction &MF) {
627+
return TailDupPlacement && !MF.getTarget().requiresStructuredCFG();
628+
}
629+
};
630+
631+
class MachineBlockPlacementLegacy : public MachineFunctionPass {
611632
public:
612633
static char ID; // Pass identification, replacement for typeid
613634

614-
MachineBlockPlacement() : MachineFunctionPass(ID) {
615-
initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
635+
MachineBlockPlacementLegacy() : MachineFunctionPass(ID) {
636+
initializeMachineBlockPlacementLegacyPass(*PassRegistry::getPassRegistry());
616637
}
617638

618-
bool runOnMachineFunction(MachineFunction &F) override;
639+
bool runOnMachineFunction(MachineFunction &MF) override {
640+
if (skipFunction(MF.getFunction()))
641+
return false;
619642

620-
bool allowTailDupPlacement() const {
621-
assert(F);
622-
return TailDupPlacement && !F->getTarget().requiresStructuredCFG();
643+
auto *MBPI =
644+
&getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
645+
auto MBFI = std::make_unique<MBFIWrapper>(
646+
getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
647+
auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
648+
auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
649+
? &getAnalysis<MachinePostDominatorTreeWrapperPass>()
650+
.getPostDomTree()
651+
: nullptr;
652+
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
653+
auto *PassConfig = &getAnalysis<TargetPassConfig>();
654+
bool AllowTailMerge = PassConfig->getEnableTailMerge();
655+
return MachineBlockPlacement(MBPI, MLI, PSI, std::move(MBFI), MPDT,
656+
AllowTailMerge)
657+
.run(MF);
623658
}
624659

625660
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -636,18 +671,18 @@ class MachineBlockPlacement : public MachineFunctionPass {
636671

637672
} // end anonymous namespace
638673

639-
char MachineBlockPlacement::ID = 0;
674+
char MachineBlockPlacementLegacy::ID = 0;
640675

641-
char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
676+
char &llvm::MachineBlockPlacementID = MachineBlockPlacementLegacy::ID;
642677

643-
INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE,
678+
INITIALIZE_PASS_BEGIN(MachineBlockPlacementLegacy, DEBUG_TYPE,
644679
"Branch Probability Basic Block Placement", false, false)
645680
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
646681
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
647682
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
648683
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
649684
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
650-
INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
685+
INITIALIZE_PASS_END(MachineBlockPlacementLegacy, DEBUG_TYPE,
651686
"Branch Probability Basic Block Placement", false, false)
652687

653688
#ifndef NDEBUG
@@ -1130,7 +1165,7 @@ MachineBlockPlacement::getBestTrellisSuccessor(
11301165
MachineBasicBlock *Succ1 = BestA.Dest;
11311166
MachineBasicBlock *Succ2 = BestB.Dest;
11321167
// Check to see if tail-duplication would be profitable.
1133-
if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
1168+
if (allowTailDupPlacement(*F) && shouldTailDuplicate(Succ2) &&
11341169
canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
11351170
isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),
11361171
Chain, BlockFilter)) {
@@ -1655,7 +1690,7 @@ MachineBlockPlacement::selectBestSuccessor(const MachineBasicBlock *BB,
16551690
if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
16561691
Chain, BlockFilter)) {
16571692
// If tail duplication would make Succ profitable, place it.
1658-
if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
1693+
if (allowTailDupPlacement(*F) && shouldTailDuplicate(Succ))
16591694
DupCandidates.emplace_back(SuccProb, Succ);
16601695
continue;
16611696
}
@@ -1883,7 +1918,7 @@ void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB,
18831918
auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
18841919
MachineBasicBlock *BestSucc = Result.BB;
18851920
bool ShouldTailDup = Result.ShouldTailDup;
1886-
if (allowTailDupPlacement())
1921+
if (allowTailDupPlacement(*F))
18871922
ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(
18881923
BB, BestSucc, Chain, BlockFilter));
18891924

@@ -1910,7 +1945,7 @@ void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB,
19101945

19111946
// Placement may have changed tail duplication opportunities.
19121947
// Check for that now.
1913-
if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
1948+
if (allowTailDupPlacement(*F) && BestSucc && ShouldTailDup) {
19141949
repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
19151950
BlockFilter, PrevUnplacedBlockIt,
19161951
PrevUnplacedBlockInFilterIt);
@@ -3466,7 +3501,7 @@ void MachineBlockPlacement::initTailDupThreshold() {
34663501

34673502
// For aggressive optimization, we can adjust some thresholds to be less
34683503
// conservative.
3469-
if (PassConfig->getOptLevel() >= CodeGenOptLevel::Aggressive) {
3504+
if (OptLevel >= CodeGenOptLevel::Aggressive) {
34703505
// At O3 we should be more willing to copy blocks for tail duplication. This
34713506
// increases size pressure, so we only do it at O3
34723507
// Do this unless only the regular threshold is explicitly set.
@@ -3478,29 +3513,56 @@ void MachineBlockPlacement::initTailDupThreshold() {
34783513
// If there's no threshold provided through options, query the target
34793514
// information for a threshold instead.
34803515
if (TailDupPlacementThreshold.getNumOccurrences() == 0 &&
3481-
(PassConfig->getOptLevel() < CodeGenOptLevel::Aggressive ||
3516+
(OptLevel < CodeGenOptLevel::Aggressive ||
34823517
TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0))
3483-
TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel());
3518+
TailDupSize = TII->getTailDuplicateSize(OptLevel);
34843519
}
34853520

3486-
bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
3487-
if (skipFunction(MF.getFunction()))
3488-
return false;
3521+
PreservedAnalyses
3522+
MachineBlockPlacementPass::run(MachineFunction &MF,
3523+
MachineFunctionAnalysisManager &MFAM) {
3524+
auto *MBPI = &MFAM.getResult<MachineBranchProbabilityAnalysis>(MF);
3525+
auto MBFI = std::make_unique<MBFIWrapper>(
3526+
MFAM.getResult<MachineBlockFrequencyAnalysis>(MF));
3527+
auto *MLI = &MFAM.getResult<MachineLoopAnalysis>(MF);
3528+
auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
3529+
? &MFAM.getResult<MachinePostDominatorTreeAnalysis>(MF)
3530+
: nullptr;
3531+
auto *PSI = MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(MF)
3532+
.getCachedResult<ProfileSummaryAnalysis>(
3533+
*MF.getFunction().getParent());
3534+
if (!PSI)
3535+
report_fatal_error("MachineBlockPlacement requires ProfileSummaryAnalysis",
3536+
false);
3537+
3538+
MachineBlockPlacement MBP(MBPI, MLI, PSI, std::move(MBFI), MPDT,
3539+
AllowTailMerge);
3540+
3541+
if (!MBP.run(MF))
3542+
return PreservedAnalyses::all();
3543+
3544+
return getMachineFunctionPassPreservedAnalyses();
3545+
}
3546+
3547+
void MachineBlockPlacementPass::printPipeline(
3548+
raw_ostream &OS,
3549+
function_ref<StringRef(StringRef)> MapClassName2PassName) const {
3550+
OS << MapClassName2PassName(name());
3551+
if (!AllowTailMerge)
3552+
OS << "<no-tail-merge>";
3553+
}
3554+
3555+
bool MachineBlockPlacement::run(MachineFunction &MF) {
34893556

34903557
// Check for single-block functions and skip them.
34913558
if (std::next(MF.begin()) == MF.end())
34923559
return false;
34933560

34943561
F = &MF;
3495-
MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
3496-
MBFI = std::make_unique<MBFIWrapper>(
3497-
getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
3498-
MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
3562+
OptLevel = F->getTarget().getOptLevel();
3563+
34993564
TII = MF.getSubtarget().getInstrInfo();
35003565
TLI = MF.getSubtarget().getTargetLowering();
3501-
MPDT = nullptr;
3502-
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
3503-
PassConfig = &getAnalysis<TargetPassConfig>();
35043566

35053567
// Initialize PreferredLoopExit to nullptr here since it may never be set if
35063568
// there are no MachineLoops.
@@ -3529,8 +3591,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
35293591
}
35303592

35313593
// Apply tail duplication.
3532-
if (allowTailDupPlacement()) {
3533-
MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
3594+
if (allowTailDupPlacement(*F)) {
35343595
if (OptForSize)
35353596
TailDupSize = 1;
35363597
const bool PreRegAlloc = false;
@@ -3548,8 +3609,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
35483609
// TailMerge can create jump into if branches that make CFG irreducible for
35493610
// HW that requires structured CFG.
35503611
const bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
3551-
PassConfig->getEnableTailMerge() &&
3552-
BranchFoldPlacement && MF.size() > 3;
3612+
AllowTailMerge && BranchFoldPlacement &&
3613+
MF.size() > 3;
35533614
// No tail merging opportunities if the block number is less than four.
35543615
if (EnableTailMerge) {
35553616
const unsigned TailMergeSize = TailDupSize + 1;

llvm/lib/Passes/PassBuilder.cpp

+14
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@
112112
#include "llvm/CodeGen/LowerEmuTLS.h"
113113
#include "llvm/CodeGen/MIRPrinter.h"
114114
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
115+
#include "llvm/CodeGen/MachineBlockPlacement.h"
115116
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
116117
#include "llvm/CodeGen/MachineCSE.h"
117118
#include "llvm/CodeGen/MachineCopyPropagation.h"
@@ -1443,6 +1444,19 @@ Expected<bool> parseMachineSinkingPassOptions(StringRef Params) {
14431444
"MachineSinkingPass");
14441445
}
14451446

1447+
Expected<bool> parseMachineBlockPlacementPassOptions(StringRef Params) {
1448+
bool AllowTailMerge = true;
1449+
if (!Params.empty()) {
1450+
AllowTailMerge = !Params.consume_front("no-");
1451+
if (Params != "tail-merge")
1452+
return make_error<StringError>(
1453+
formatv("invalid MachineBlockPlacementPass parameter '{0}' ", Params)
1454+
.str(),
1455+
inconvertibleErrorCode());
1456+
}
1457+
return AllowTailMerge;
1458+
}
1459+
14461460
} // namespace
14471461

14481462
/// Tests whether a pass name starts with a valid prefix for a default pipeline

llvm/test/CodeGen/AArch64/pauthlr-prologue-duplication.mir

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
22
# RUN: llc -mtriple aarch64-none-elf -run-pass=block-placement -O3 -o - %s | FileCheck %s
3+
# RUN: llc -mtriple aarch64-none-elf -passes='require<profile-summary>,function(machine-function(block-placement))' -O3 -o - %s | FileCheck %s
34

45
## Check that block-placement does not perform tail duplication on the
56
## PAUTH_EPILOGUE instruction. If that happened, the two prologues would use

llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
# RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1010 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX10 %s
33
# RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
44

5+
# RUN: llc -mtriple=amdgcn -o - -passes='require<profile-summary>,function(machine-function(block-placement<tail-merge>))' -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
6+
57
# Used to fail with
68
# Assertion `Out && "Header of loop has no predecessors from outside loop?"
79

llvm/test/CodeGen/X86/block-placement.mir

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# RUN: llc -mtriple=x86_64-apple-macosx10.12.0 -O3 -run-pass=block-placement -o - %s | FileCheck %s
2+
# RUN: llc -mtriple=x86_64-apple-macosx10.12.0 -O3 -passes='require<profile-summary>,function(machine-function(block-placement))' -o - %s | FileCheck %s
23

34
--- |
45
; ModuleID = 'test.ll'

0 commit comments

Comments
 (0)